From 33feb2c4f3c1c030a672d764338afbbc72146302 Mon Sep 17 00:00:00 2001
From: Mark Hillebrand <mahilleb@microsoft.com>
Date: Tue, 10 May 2016 17:56:00 +0200
Subject: [PATCH 01/51] .cntk: remove obsolete useValidation= parameter

---
 Examples/Speech/Miscellaneous/AMI/cntk_config/Align.cntk      | 1 -
 Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK.cntk       | 4 ----
 Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2.cntk      | 1 -
 Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_dnn.cntk  | 1 -
 .../Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp.cntk     | 1 -
 .../Miscellaneous/AMI/cntk_config/CNTK2_lstmp_smbr.cntk       | 1 -
 Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_smbr.cntk | 1 -
 .../Speech/Miscellaneous/AMI/cntk_config/CNTK2_write.cntk     | 1 -
 Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK_write.cntk | 1 -
 Examples/Speech/Miscellaneous/TIMIT/CPU/TIMIT_LSTM.cntk       | 1 -
 Examples/Speech/Miscellaneous/TIMIT/GPU/TIMIT_LSTM.cntk       | 1 -
 .../Speech/Miscellaneous/TIMIT/config/TIMIT_TrainLSTM.cntk    | 3 +--
 .../Text/PennTreebank/AdditionalFiles/RNNLM/CPU/fnnlm.cntk    | 4 +---
 .../Text/PennTreebank/AdditionalFiles/RNNLM/CPU/rnnlm.cntk    | 4 +---
 .../Text/PennTreebank/AdditionalFiles/RNNLM/GPU/fnnlm.cntk    | 4 +---
 .../Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.cntk    | 4 +---
 .../PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.gpu.cntk     | 4 +---
 Tests/EndToEndTests/LM/RNNLM/CPU/rnnlm.cntk                   | 4 +---
 Tests/EndToEndTests/LM/RNNLM/GPU/rnnlm.cntk                   | 4 +---
 Tests/EndToEndTests/Speech/DNN/WriteCommand/cntk.cntk         | 1 -
 20 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/Align.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/Align.cntk
index f54cd953f..0f0d24f67 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/Align.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/Align.cntk
@@ -10,7 +10,6 @@ write=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     printValues=true
       
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK.cntk
index 9889d7216..bb229d723 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK.cntk
@@ -12,7 +12,6 @@ TrainDNN=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl	
@@ -98,7 +97,6 @@ TrainLSTM=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl	
         networkDescription=$ndlfile$
@@ -183,7 +181,6 @@ TrainPACRNN=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
 #        ndlMacros=$NdlDir$/default_macros.ndl	
@@ -286,7 +283,6 @@ write=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     printValues=true
       
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2.cntk
index 94f5fb4b1..8c37e9922 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2.cntk
@@ -12,7 +12,6 @@ TrainDNN=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl	
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_dnn.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_dnn.cntk
index 799d3a395..24452722c 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_dnn.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_dnn.cntk
@@ -10,7 +10,6 @@ TrainModel=[
     # deviceId=-1 for CPU, >=0 for GPU devices
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl
         networkDescription=$NdlDir$/model.ndl
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp.cntk
index 619f9df97..526a1010e 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp.cntk
@@ -11,7 +11,6 @@ TrainModel=[
     # deviceId=-1 for CPU, >=0 for GPU devices
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl
         networkDescription=$NdlDir$/model.ndl
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp_smbr.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp_smbr.cntk
index 4f174bb4f..f687269d8 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp_smbr.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_lstmp_smbr.cntk
@@ -20,7 +20,6 @@ TrainModel=[
     # deviceId=-1 for CPU, >=0 for GPU devices
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_smbr.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_smbr.cntk
index b8ce86603..273ac72dc 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_smbr.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_smbr.cntk
@@ -19,7 +19,6 @@ TrainModel=[
     # deviceId=-1 for CPU, >=0 for GPU devices
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         ndlMacros=$NdlDir$/default_macros.ndl
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_write.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_write.cntk
index f4eb4b92a..83a912a71 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_write.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK2_write.cntk
@@ -12,7 +12,6 @@ write=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     printValues=true
       
diff --git a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK_write.cntk b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK_write.cntk
index c8ed76e43..c41719732 100644
--- a/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK_write.cntk
+++ b/Examples/Speech/Miscellaneous/AMI/cntk_config/CNTK_write.cntk
@@ -12,7 +12,6 @@ write=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     printValues=true
       
diff --git a/Examples/Speech/Miscellaneous/TIMIT/CPU/TIMIT_LSTM.cntk b/Examples/Speech/Miscellaneous/TIMIT/CPU/TIMIT_LSTM.cntk
index b68e4b4b7..c60f8a303 100644
--- a/Examples/Speech/Miscellaneous/TIMIT/CPU/TIMIT_LSTM.cntk
+++ b/Examples/Speech/Miscellaneous/TIMIT/CPU/TIMIT_LSTM.cntk
@@ -20,7 +20,6 @@ speechTrainNDL=[
     deviceId=-1
 
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         networkDescription=$ConfigFolder$\LSTM_1layer.ndl
diff --git a/Examples/Speech/Miscellaneous/TIMIT/GPU/TIMIT_LSTM.cntk b/Examples/Speech/Miscellaneous/TIMIT/GPU/TIMIT_LSTM.cntk
index 2e1457260..a7130efe9 100644
--- a/Examples/Speech/Miscellaneous/TIMIT/GPU/TIMIT_LSTM.cntk
+++ b/Examples/Speech/Miscellaneous/TIMIT/GPU/TIMIT_LSTM.cntk
@@ -20,7 +20,6 @@ speechTrainNDL=[
     deviceId=0
 
     traceLevel=1
-    useValidation=true
 
     NDLNetworkBuilder=[
         networkDescription=$ConfigFolder$\LSTM_1layer.ndl
diff --git a/Examples/Speech/Miscellaneous/TIMIT/config/TIMIT_TrainLSTM.cntk b/Examples/Speech/Miscellaneous/TIMIT/config/TIMIT_TrainLSTM.cntk
index 76a5e8bc8..669510686 100644
--- a/Examples/Speech/Miscellaneous/TIMIT/config/TIMIT_TrainLSTM.cntk
+++ b/Examples/Speech/Miscellaneous/TIMIT/config/TIMIT_TrainLSTM.cntk
@@ -12,7 +12,6 @@ TIMIT_TrainLSTM=[
     # deviceId=-1 for CPU, >=0 for GPU devices 
     deviceId=$DeviceNumber$
     traceLevel=1
-    useValidation=true
 
     truncated=true
 
@@ -87,4 +86,4 @@ TIMIT_TrainLSTM=[
             labelMappingFile=$MlfDir$/TIMIT.statelist
         ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/fnnlm.cntk b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/fnnlm.cntk
index bb388b2a0..ba1e9db0a 100644
--- a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/fnnlm.cntk
+++ b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/fnnlm.cntk
@@ -40,7 +40,6 @@ deviceId=-1
 epochSize=4430000
 # which is 886 * 5000
 defaultHiddenActivity=0.1
-useValidation=true
 rnnType=CLASSLM
 # rnnType=LSTM
 
@@ -307,7 +306,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -410,4 +408,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/rnnlm.cntk b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/rnnlm.cntk
index 21d4ccc5f..3532f5365 100644
--- a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/rnnlm.cntk
+++ b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/CPU/rnnlm.cntk
@@ -41,7 +41,6 @@ train=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
     # rnnType=LSTM
 
@@ -308,7 +307,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -411,4 +409,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/fnnlm.cntk b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/fnnlm.cntk
index a3a53158b..35174b000 100644
--- a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/fnnlm.cntk
+++ b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/fnnlm.cntk
@@ -30,7 +30,6 @@ epochSize=4430000
 # which is 886 * 5000
 #    recurrentLayer=1
 defaultHiddenActivity=0.0
-useValidation=true
 rnnType=CLASSLM
 # rnnType=LSTM
 
@@ -297,7 +296,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -400,4 +398,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.cntk b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.cntk
index ade12ad4f..ee9797289 100644
--- a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.cntk
+++ b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.cntk
@@ -31,7 +31,6 @@ epochSize=4430000
 # which is 886 * 5000
 recurrentLayer=1
 defaultHiddenActivity=0.0
-useValidation=true
 rnnType=CLASSLM
 # rnnType=LSTM
 
@@ -298,7 +297,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -401,4 +399,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.gpu.cntk b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.gpu.cntk
index 65f5994b0..8793e7feb 100644
--- a/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.gpu.cntk
+++ b/Examples/Text/PennTreebank/AdditionalFiles/RNNLM/GPU/rnnlm.gpu.cntk
@@ -31,7 +31,6 @@ train=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.0
-    useValidation=true
     rnnType=CLASSLM
     # rnnType=LSTM
 
@@ -297,7 +296,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -400,4 +398,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Tests/EndToEndTests/LM/RNNLM/CPU/rnnlm.cntk b/Tests/EndToEndTests/LM/RNNLM/CPU/rnnlm.cntk
index c4a7cab34..53674f053 100644
--- a/Tests/EndToEndTests/LM/RNNLM/CPU/rnnlm.cntk
+++ b/Tests/EndToEndTests/LM/RNNLM/CPU/rnnlm.cntk
@@ -41,7 +41,6 @@ train=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
     # rnnType=LSTM
 
@@ -308,7 +307,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -411,4 +409,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Tests/EndToEndTests/LM/RNNLM/GPU/rnnlm.cntk b/Tests/EndToEndTests/LM/RNNLM/GPU/rnnlm.cntk
index f8b2150c2..cb2d7d75e 100644
--- a/Tests/EndToEndTests/LM/RNNLM/GPU/rnnlm.cntk
+++ b/Tests/EndToEndTests/LM/RNNLM/GPU/rnnlm.cntk
@@ -30,7 +30,6 @@ train=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.0
-    useValidation=true
     rnnType=CLASSLM
     # rnnType=LSTM
 
@@ -297,7 +296,6 @@ test=[
     # which is 886 * 5000
     recurrentLayer=1
     defaultHiddenActivity=0.1
-    useValidation=true
     rnnType=CLASSLM
 
     modelPath=$ExpFolder$\modelRnnCNTK
@@ -400,4 +398,4 @@ test=[
         ]
       ]
     ]
-]
\ No newline at end of file
+]
diff --git a/Tests/EndToEndTests/Speech/DNN/WriteCommand/cntk.cntk b/Tests/EndToEndTests/Speech/DNN/WriteCommand/cntk.cntk
index 333de87ba..2dc560f53 100644
--- a/Tests/EndToEndTests/Speech/DNN/WriteCommand/cntk.cntk
+++ b/Tests/EndToEndTests/Speech/DNN/WriteCommand/cntk.cntk
@@ -71,7 +71,6 @@ write = [
 
     deviceId = $DeviceId$
     traceLevel = 1
-    useValidation=true
 
     printValues=true
       

From f334cb9b6c66d2ffe4e0bfa3ffeff80a01418938 Mon Sep 17 00:00:00 2001
From: Daniel Merget <daniel.merget@tum.de>
Date: Wed, 11 May 2016 15:01:23 +0200
Subject: [PATCH 02/51] bugfix: grayscale image now assumed by default if
 number of channels is 1

---
 Source/Readers/ImageReader/ImageConfigHelper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Readers/ImageReader/ImageConfigHelper.cpp b/Source/Readers/ImageReader/ImageConfigHelper.cpp
index 81e6de866..8da05b9af 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp
@@ -77,7 +77,7 @@ ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
 
     m_mapPath = config(L"file");
 
-    m_grayscale = config(L"grayscale", false);
+    m_grayscale = config(L"grayscale", c == 1);
     std::string rand = config(L"randomize", "auto");
 
     if (AreEqualIgnoreCase(rand, "auto"))

From f603104050c6a768cce1f3f3ef3fdc31673d2f80 Mon Sep 17 00:00:00 2001
From: Alexey Kamenev <alexeyk@microsoft.com>
Date: Wed, 11 May 2016 11:10:37 -0700
Subject: [PATCH 03/51] Refactored ImageReader config and fixed multiview10 as
 default bug.

---
 .../Readers/ImageReader/ImageConfigHelper.cpp | 23 ++++++++++-
 .../Readers/ImageReader/ImageConfigHelper.h   | 22 +++++++++--
 .../Readers/ImageReader/ImageTransformers.cpp | 38 ++++---------------
 .../Readers/ImageReader/ImageTransformers.h   | 12 ++----
 4 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageConfigHelper.cpp b/Source/Readers/ImageReader/ImageConfigHelper.cpp
index 81e6de866..e64da57b1 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp
@@ -112,7 +112,7 @@ ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
 
     m_cpuThreadCount = config(L"numCPUThreads", 0);
 
-    m_multiViewCrop = AreEqualIgnoreCase((string)featureSection(L"cropType", ""), "multiview10");
+    m_cropType = ParseCropType(featureSection(L"cropType", ""));
 }
 
 std::vector<StreamDescriptionPtr> ImageConfigHelper::GetStreams() const
@@ -136,4 +136,25 @@ std::string ImageConfigHelper::GetMapPath() const
 {
     return m_mapPath;
 }
+
+CropType ImageConfigHelper::ParseCropType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "center"))
+    {
+        return CropType::Center;
+    }
+
+    if (AreEqualIgnoreCase(src, "random"))
+    {
+        return CropType::Random;
+    }
+
+    if (AreEqualIgnoreCase(src, "multiview10"))
+    {
+        return CropType::MultiView10;
+    }
+
+    RuntimeError("Invalid crop type: %s.", src.c_str());
+}
+
 }}}
diff --git a/Source/Readers/ImageReader/ImageConfigHelper.h b/Source/Readers/ImageReader/ImageConfigHelper.h
index 14407eacb..689ce8349 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.h
+++ b/Source/Readers/ImageReader/ImageConfigHelper.h
@@ -12,6 +12,13 @@
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
+enum class CropType
+{
+    Center = 0,
+    Random = 1,
+    MultiView10 = 2
+};
+
 // A helper class for image specific parameters.
 // A simple wrapper around CNTK ConfigParameters.
 class ImageConfigHelper
@@ -50,23 +57,30 @@ public:
     {
         return m_grayscale;
     }
-	
-	bool IsMultiViewCrop() const
+
+    CropType GetCropType() const
     {
-        return m_multiViewCrop;
+        return m_cropType;
+    }
+
+    bool IsMultiViewCrop() const
+    {
+        return m_cropType == CropType::MultiView10;
     }
 
 private:
     ImageConfigHelper(const ImageConfigHelper&) = delete;
     ImageConfigHelper& operator=(const ImageConfigHelper&) = delete;
 
+    CropType ParseCropType(const std::string &src);
+
     std::string m_mapPath;
     std::vector<StreamDescriptionPtr> m_streams;
     ImageLayoutKind m_dataFormat;
     int m_cpuThreadCount;
     bool m_randomize;
-    bool m_multiViewCrop;
     bool m_grayscale;
+    CropType m_cropType;
 };
 
 typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index cd7ebaeee..271e99575 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -10,7 +10,6 @@
 #include "ImageTransformers.h"
 #include "Config.h"
 #include "ConcStack.h"
-#include "ImageConfigHelper.h"
 #include "StringUtil.h"
 #include "ElementTypeUtils.h"
 
@@ -28,10 +27,11 @@ void ImageTransformerBase::Initialize(TransformerPtr next,
                                       const ConfigParameters &readerConfig)
 {
     Base::Initialize(next, readerConfig);
+    m_imageConfig = std::make_unique<ImageConfigHelper>(readerConfig);
+
     m_seed = readerConfig(L"seed", (unsigned int)0);
 
-    ImageConfigHelper config(readerConfig);
-    size_t featureStreamId = config.GetFeatureStreamId();
+    size_t featureStreamId = m_imageConfig->GetFeatureStreamId();
     m_appliedStreamIds.push_back(featureStreamId);
     if (m_appliedStreamIds.size() != 1)
     {
@@ -103,8 +103,6 @@ void CropTransformer::Initialize(TransformerPtr next,
 
 void CropTransformer::InitFromConfig(const ConfigParameters &config)
 {
-    m_cropType = ParseCropType(config(L"cropType", ""));
-
     floatargvector cropRatio = config(L"cropRatio", "1.0");
     m_cropRatioMin = cropRatio[0];
     m_cropRatioMax = cropRatio[1];
@@ -121,7 +119,7 @@ void CropTransformer::InitFromConfig(const ConfigParameters &config)
 
     if (!config.ExistsCurrent(L"hflip"))
     {
-        m_hFlip = m_cropType == CropType::Random;
+        m_hFlip = m_imageConfig->GetCropType() == CropType::Random;
     }
     else
     {
@@ -166,9 +164,9 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
         RuntimeError("Jitter type currently not implemented.");
     }
 
-    int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0;
+    int viewIndex = m_imageConfig->IsMultiViewCrop() ? (int)(id % 10) : 0;
 
-    mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng));
+    mat = mat(GetCropRect(m_imageConfig->GetCropType(), viewIndex, mat.rows, mat.cols, ratio, *rng));
     if ((m_hFlip && std::bernoulli_distribution()(*rng)) ||
         viewIndex >= 5)
     {
@@ -178,29 +176,7 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
-CropTransformer::CropType
-CropTransformer::ParseCropType(const std::string &src)
-{
-    if (src.empty() || AreEqualIgnoreCase(src, "center"))
-    {
-        return CropType::Center;
-    }
-
-    if (AreEqualIgnoreCase(src, "random"))
-    {
-        return CropType::Random;
-    }
-
-    if (AreEqualIgnoreCase(src, "multiview10"))
-    {
-        return CropType::MultiView10;
-    }
-
-    RuntimeError("Invalid crop type: %s.", src.c_str());
-}
-
-CropTransformer::RatioJitterType
-CropTransformer::ParseJitterType(const std::string &src)
+CropTransformer::RatioJitterType CropTransformer::ParseJitterType(const std::string &src)
 {
     if (src.empty() || AreEqualIgnoreCase(src, "none"))
     {
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index aa7cde648..a05af85b0 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -13,6 +13,7 @@
 #include "ConcStack.h"
 #include "TransformerBase.h"
 #include "Config.h"
+#include "ImageConfigHelper.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -55,6 +56,9 @@ protected:
     // The only function that should be redefined by the inherited classes.
     virtual void Apply(size_t id, cv::Mat &from) = 0;
 
+protected:
+    std::unique_ptr<ImageConfigHelper> m_imageConfig;
+
 private:
     std::vector<StreamDescriptionPtr> m_outputStreams;
     std::vector<StreamId> m_appliedStreamIds;
@@ -72,12 +76,6 @@ private:
     void Apply(size_t id, cv::Mat &mat) override;
 
 private:
-    enum class CropType
-    {
-        Center = 0,
-        Random = 1,
-        MultiView10 = 2
-    };
     enum class RatioJitterType
     {
         None = 0,
@@ -90,12 +88,10 @@ private:
 
     void StartEpoch(const EpochConfiguration &config) override;
 
-    CropType ParseCropType(const std::string &src);
     RatioJitterType ParseJitterType(const std::string &src);
     cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);
 
     conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-    CropType m_cropType;
     double m_cropRatioMin;
     double m_cropRatioMax;
     RatioJitterType m_jitterType;

From 04041e98d607fc08c382f86fcce7e568c1413f8d Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Thu, 5 May 2016 16:39:16 -0700
Subject: [PATCH 04/51] Fixed a memory leak in GPUMatrix

---
 Source/Math/CPUMatrix.cpp |  5 +++--
 Source/Math/GPUMatrix.cu  | 44 ++++++++++++++++++++-------------------
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/Source/Math/CPUMatrix.cpp b/Source/Math/CPUMatrix.cpp
index f045a3c9d..dd004a207 100644
--- a/Source/Math/CPUMatrix.cpp
+++ b/Source/Math/CPUMatrix.cpp
@@ -1411,8 +1411,9 @@ void CPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
     }
 
     // success
-    m_numRows = numRows;
-    m_numCols = numCols;
+    m_sliceViewOffset = 0;
+    m_numRows         = numRows;
+    m_numCols         = numCols;
 }
 
 // allocated by the callee but should be deleted by the caller
diff --git a/Source/Math/GPUMatrix.cu b/Source/Math/GPUMatrix.cu
index 3d030e6c2..dee6e27a3 100644
--- a/Source/Math/GPUMatrix.cu
+++ b/Source/Math/GPUMatrix.cu
@@ -1467,29 +1467,31 @@ void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
     if (GetNumRows() == numRows && GetNumCols() == numCols)
         return;
 
+    size_t numElements = numRows * numCols;
+    if (numElements > GetSizeAllocated() ||                 // grow allocation
+        (!growOnly && numElements != GetSizeAllocated()))   // shrink allocation if not growOnly
+    {
+        // reallocate buffer if numElements > 0
+        ElemType* pArray = nullptr;
+        if (numElements > 0)
+            pArray = TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), numRows, numCols);
+
+        // If the buffer exists, free it
+        if (Buffer())
+            TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
+
+        SetBuffer(pArray, numElements * sizeof(ElemType));
+        SetSizeAllocated(numElements);
+    }
+    
+#ifdef _DEBUG
+        CUDA_CALL(cudaMemset(Buffer(), 0xff, sizeof(ElemType) * GetSizeAllocated()));
+#endif
+
+    // success
+    m_sliceViewOffset = 0;
     m_numRows = numRows;
     m_numCols = numCols;
-
-    size_t numElements = GetNumElements();
-    if (numElements > GetSizeAllocated() || (!growOnly && numElements != GetSizeAllocated()))
-    {
-        if (IsEmpty())
-        {
-            SetSizeAllocated(0);
-            SetBuffer(nullptr, 0);
-        }
-        else
-        {
-            if (Buffer())
-            {
-                TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
-            }
-            SetSizeAllocated(numElements);
-            SetBuffer(TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), m_numRows, m_numCols), numElements * sizeof(ElemType));
-            CUDA_CALL(cudaMemset(Buffer(), 0, sizeof(ElemType) * GetSizeAllocated()));
-        }
-    }
-    m_sliceViewOffset = 0;
 }
 
 template <class ElemType>

From 7816b5ffc4b35d2e3b4a3eb4bfd8969cc3c11567 Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Thu, 5 May 2016 16:40:30 -0700
Subject: [PATCH 05/51] Fixed transferFromDeviceToDevice

---
 Source/Math/CPUSparseMatrix.cpp |  1 +
 Source/Math/Matrix.cpp          | 58 ++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/Source/Math/CPUSparseMatrix.cpp b/Source/Math/CPUSparseMatrix.cpp
index 4a3a213e6..ad18b1813 100644
--- a/Source/Math/CPUSparseMatrix.cpp
+++ b/Source/Math/CPUSparseMatrix.cpp
@@ -1384,6 +1384,7 @@ template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
 template char* CPUSparseMatrix<char>::Data() const;
 template char* CPUSparseMatrix<char>::Data();
 template void CPUSparseMatrix<char>::Reset(void);
+template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
 template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
 template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const MatrixFormat, const bool, bool);
 template CPUSparseMatrix<char>::~CPUSparseMatrix();
diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp
index 42a1bf16d..2409394f8 100644
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@@ -3610,13 +3610,22 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
             if (!m_CPUSparseMatrix)
                 LogicError("Can't move from CPU because I'm not there!");
 
-            if (!m_GPUSparseMatrix)
-                m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id, m_CPUSparseMatrix->GetFormat());
-            else
-                m_GPUSparseMatrix->ChangeDeviceTo(to_id);
-
-            if (m_CPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
+            if (emptyTransfer)
             {
+                if (m_GPUSparseMatrix)
+                {
+                    m_GPUSparseMatrix->ChangeDeviceTo(to_id);
+                    m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
+                }
+                else
+                    m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
+            }
+            else
+            {
+                if (!m_GPUSparseMatrix)
+                    m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
+                else
+                    m_GPUSparseMatrix->ChangeDeviceTo(to_id);
                 m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
             }
 
@@ -3640,10 +3649,10 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                 if (!m_CPUSparseMatrix)
                     m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(m_GPUSparseMatrix->GetFormat());
 
-                if (m_GPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
-                {
+                if (emptyTransfer)
+                    m_CPUSparseMatrix->Resize(m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->NzCount(), true);
+                else
                     m_GPUSparseMatrix->CopyToCPUSparseMatrix(*m_CPUSparseMatrix);
-                }
 
                 if (isBeingMoved)
                 {
@@ -3668,13 +3677,19 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
         {
             if (!m_CPUMatrix)
                 LogicError("Can't move from CPU because I'm not there!");
-            if (m_CPUMatrix->GetNumElements() != 0 && !emptyTransfer)
+            if (emptyTransfer)
             {
-                m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Buffer(), matrixFlagNormal);
+                if (m_GPUMatrix)
+                    m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
+                else
+                    m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
             }
             else
             {
-                m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(to_id);
+                if (m_GPUMatrix)
+                    m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
+                else
+                    m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
             }
             if (isBeingMoved)
             {
@@ -3682,9 +3697,7 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                 m_CPUMatrix = nullptr;
             }
             else
-            {
                 SetDataLocation(BOTH, DENSE);
-            }
         }
         else // from GPU
         {
@@ -3693,15 +3706,22 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
 
             if (to_id < 0) // to CPU
             {
-                if (m_GPUMatrix->GetNumElements() != 0 && !emptyTransfer)
+                if (emptyTransfer)
                 {
-                    ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
-                    m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
-                    delete[] arr;
+                    if (m_CPUMatrix)
+                        m_CPUMatrix->Resize(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
+                    else
+                        m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
                 }
                 else
                 {
-                    m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
+                    ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
+                    if (m_CPUMatrix)
+                        m_CPUMatrix->SetValue(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr);
+                    else
+                        m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
+
+                    delete[] arr;
                 }
 
                 if (isBeingMoved)

From b88cdc3b4714f77b2e863dfac663fb0214252f12 Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Fri, 6 May 2016 16:18:16 -0700
Subject: [PATCH 06/51] Fixes related to comments from the CR.

---
 Source/Math/CPUMatrix.cpp |  1 -
 Source/Math/GPUMatrix.cu  |  4 ----
 Source/Math/Matrix.cpp    | 13 ++++---------
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/Source/Math/CPUMatrix.cpp b/Source/Math/CPUMatrix.cpp
index dd004a207..32c19e6aa 100644
--- a/Source/Math/CPUMatrix.cpp
+++ b/Source/Math/CPUMatrix.cpp
@@ -1382,7 +1382,6 @@ void CPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols
 // Resize() -- change matrix size
 // This function is cheap if the matrix size does not change.
 // Current content is not preserved.
-// BUGBUG: There is code that relies on zero initialization (without, we get subtle variations of output). That is wrong--we should initialize to QNaN and see where it fails.
 // If growOnly is true, resize will not reallocate memory if the current memory is large enough (i.e., will not shrink).
 // If this object does not own its memory then new memory cannot be allocated (one can still shrink and/or reshape).
 template <class ElemType>
diff --git a/Source/Math/GPUMatrix.cu b/Source/Math/GPUMatrix.cu
index dee6e27a3..a2aa9fcbb 100644
--- a/Source/Math/GPUMatrix.cu
+++ b/Source/Math/GPUMatrix.cu
@@ -1484,10 +1484,6 @@ void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
         SetSizeAllocated(numElements);
     }
     
-#ifdef _DEBUG
-        CUDA_CALL(cudaMemset(Buffer(), 0xff, sizeof(ElemType) * GetSizeAllocated()));
-#endif
-
     // success
     m_sliceViewOffset = 0;
     m_numRows = numRows;
diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp
index 2409394f8..2ab162da3 100644
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@@ -3612,20 +3612,15 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
 
             if (emptyTransfer)
             {
-                if (m_GPUSparseMatrix)
-                {
-                    m_GPUSparseMatrix->ChangeDeviceTo(to_id);
+                if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetDeviceId() == to_id))
                     m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
-                }
                 else
                     m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
             }
             else
             {
-                if (!m_GPUSparseMatrix)
+                if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetDeviceId() != to_id)
                     m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
-                else
-                    m_GPUSparseMatrix->ChangeDeviceTo(to_id);
                 m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
             }
 
@@ -3679,14 +3674,14 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                 LogicError("Can't move from CPU because I'm not there!");
             if (emptyTransfer)
             {
-                if (m_GPUMatrix)
+                if (m_GPUMatrix && m_GPUMatrix->GetDeviceId() == to_id)
                     m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
                 else
                     m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
             }
             else
             {
-                if (m_GPUMatrix)
+                if (m_GPUMatrix && m_GPUMatrix->GetDeviceId() == to_id)
                     m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
                 else
                     m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());

From 1d05742d4a3ed0a03ae10184b48b0f4dc3c705a3 Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Fri, 6 May 2016 16:42:08 -0700
Subject: [PATCH 07/51] Fixed GetDeviceId -> GetComputeDeviceId

---
 Source/Math/Matrix.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp
index 2ab162da3..b73f94659 100644
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@@ -3612,14 +3612,14 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
 
             if (emptyTransfer)
             {
-                if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetDeviceId() == to_id))
+                if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetComputeDeviceId() == to_id)
                     m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
                 else
                     m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
             }
             else
             {
-                if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetDeviceId() != to_id)
+                if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != to_id)
                     m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
                 m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
             }
@@ -3674,14 +3674,14 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                 LogicError("Can't move from CPU because I'm not there!");
             if (emptyTransfer)
             {
-                if (m_GPUMatrix && m_GPUMatrix->GetDeviceId() == to_id)
+                if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
                     m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
                 else
                     m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
             }
             else
             {
-                if (m_GPUMatrix && m_GPUMatrix->GetDeviceId() == to_id)
+                if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
                     m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
                 else
                     m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());

From 5600f281ac60a6a0f2f02317a88eec5d877c0a79 Mon Sep 17 00:00:00 2001
From: Mark Hillebrand <mahilleb@microsoft.com>
Date: Mon, 9 May 2016 17:28:34 +0200
Subject: [PATCH 08/51] TruncatedBpttPacker.cpp: limit m_numParallelSequences
 for each worker

---
 Source/Readers/ReaderLib/TruncatedBpttPacker.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp b/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
index 5c76f3d79..26fb46a1b 100644
--- a/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
+++ b/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
@@ -148,6 +148,15 @@ void TruncatedBPTTPacker::StartEpoch(const EpochConfiguration& config)
         // Estimating the number of parallel sequences to pack (slots) from the minibatch size and truncation size.
         m_numParallelSequences = max(1, (int)floor(m_minibatchSize / m_truncationSize));
 
+        if (config.m_numberOfWorkers > m_numParallelSequences)
+        {
+            InvalidArgument("Too many workers for minibatch size; please increase minibatch size or decrease number of workers.");
+        }
+
+        m_numParallelSequences =
+            (m_numParallelSequences / config.m_numberOfWorkers) +
+            (config.m_workerRank < (m_numParallelSequences % config.m_numberOfWorkers) ? 1 : 0);
+
         m_sequenceBufferPerStream.clear();
 
         // Preparing the buffers.

From 39e2d1bc9493d74fffb8d5b0e9ab73fd49f00867 Mon Sep 17 00:00:00 2001
From: Mark Hillebrand <mahilleb@microsoft.com>
Date: Mon, 9 May 2016 17:29:11 +0200
Subject: [PATCH 09/51] ExperimentalHTKMLFReader: add checkData option for
 filtering seq. of uneven length

---
 Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
index f2eb666fe..40e9b5f9f 100644
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
@@ -105,7 +105,8 @@ HTKMLFReader::HTKMLFReader(MemoryProviderPtr provider,
         LogicError("Please specify at least a single input stream.");
     }
 
-    auto bundler = std::make_shared<Bundler>(readerConfig, deserializers[0], deserializers, false);
+    bool cleanse = readerConfig(L"checkData", false);
+    auto bundler = std::make_shared<Bundler>(readerConfig, deserializers[0], deserializers, cleanse);
     int verbosity = readerConfig(L"verbosity", 2);
     std::wstring readMethod = config.GetRandomizer();
 

From 3b80f2047803fb0cd39a58487a2664808f0f04b6 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 14:54:40 +0200
Subject: [PATCH 10/51] consolidate global params

---
 contrib/Python/cntk/context.py                | 54 ++++++++++++-------
 .../cntk/templates/cntk_eval_template.cntk    |  7 +--
 .../cntk_global_params_template.cntk          |  4 ++
 .../cntk/templates/cntk_test_template.cntk    |  7 +--
 .../cntk/templates/cntk_train_template.cntk   |  6 +--
 .../cntk/templates/cntk_write_template.cntk   |  7 +--
 contrib/Python/cntk/utils/eval.py             | 30 ++++++-----
 .../cntk/{ => utils}/tests/eval_test.py       |  0
 8 files changed, 61 insertions(+), 54 deletions(-)
 create mode 100644 contrib/Python/cntk/templates/cntk_global_params_template.cntk
 rename contrib/Python/cntk/{ => utils}/tests/eval_test.py (100%)

diff --git a/contrib/Python/cntk/context.py b/contrib/Python/cntk/context.py
index 77379a0e0..b083e1702 100644
--- a/contrib/Python/cntk/context.py
+++ b/contrib/Python/cntk/context.py
@@ -15,7 +15,7 @@ import numpy as np
 import shutil as sh
 
 from cntk.graph import ComputationNode
-from cntk.utils import get_cntk_cmd, MODEL_INDENTATION
+from cntk.utils import get_cntk_cmd
 from .utils import cntk_to_numpy_shape
 from .utils import with_metaclass
 from .reader import InputMap
@@ -205,6 +205,22 @@ class AbstractContext(with_metaclass(ABCMeta, object)):
 
         return description, inputs
 
+    def _generate_global_params(self, **kw):
+        '''
+        Generates key value global parameters for a CNTK configuration file.
+
+        Args:
+            kw (dict): dictionary of key values. e.g., modelPath="my/path/model"
+
+        Returns: 
+            configuration string
+        '''        
+
+        config = []        
+        for k,w in kw.items():
+            config.append('{0}={1}'.format(k, w))
+        return '\n'.join(config)
+
     def _generate_train_config(self, root_nodes, training_params, input_map, 
                                override_existing, action_name=None):
         '''
@@ -227,18 +243,17 @@ class AbstractContext(with_metaclass(ABCMeta, object)):
         description, inputs = self._generate_config(root_nodes, input_map)
 
         tmpl = open(CNTK_TRAIN_TEMPLATE_PATH, "r").read()        
-
+        g_params = self._generate_global_params(DevideId=self.device_id,
+                                                Precision='"{0}"'.format(self.precision),
+                                                ModelPath='"{0}"'.format(self.model_path))
         tmpl_dict = {
             'ActionName': action_name,
-            'DevideId': self.device_id,
-            'Precision': self.precision,
-            'ModelDescription': description,
-            'ModelPath': self.model_path,
+            'ModelDescription': description,            
             'Reader': input_map._to_config_description(),
             'SGD': training_params._to_config_description(),
         }
 
-        return tmpl % tmpl_dict
+        return "{0}\n{1}".format(g_params, tmpl % tmpl_dict)
 
 
     def _generate_test_config(self, root_nodes, input_map=None, action_name=None):
@@ -259,16 +274,17 @@ class AbstractContext(with_metaclass(ABCMeta, object)):
         # we generate the config just to collect the lazy readers in input_map
         self._generate_config(root_nodes, input_map)
 
+        g_params = self._generate_global_params(DevideId=self.device_id,
+                                                Precision='"{0}"'.format(self.precision),
+                                                ModelPath='"{0}"'.format(self.model_path))
+
         tmpl = open(CNTK_TEST_TEMPLATE_PATH, "r").read()        
 
         tmpl_dict = {
             'ActionName': action_name,
-            'DevideId': self.device_id,
-            'Precision': self.precision,
-            'ModelPath': self.model_path,
             'Reader': input_map._to_config_description(),
         }
-        return tmpl % tmpl_dict
+        return "{0}\n{1}".format(g_params, tmpl % tmpl_dict)
 
     def _generate_write_config(self, input_map, action_name=None):
         '''
@@ -285,17 +301,18 @@ class AbstractContext(with_metaclass(ABCMeta, object)):
         if input_map is None:
             input_map = InputMap()
 
+        g_params = self._generate_global_params(DevideId=self.device_id,
+                                                Precision='"{0}"'.format(self.precision),
+                                                ModelPath='"{0}"'.format(self.model_path))
+        
         tmpl = open(CNTK_WRITE_TEMPLATE_PATH, "r").read()
 
         tmpl_dict = {
             'ActionName': action_name,
-            'DevideId': self.device_id,
-            'Precision': self.precision,
-            'ModelPath': self.model_path,
             'OutputFile': self.output_filename_base,
             'Reader': input_map._to_config_description(),
         }
-        return tmpl % tmpl_dict
+        return "{0}\n{1}".format(g_params, tmpl % tmpl_dict)
 
     def _generate_eval_config(self, root_nodes, input_map=None, 
                               node_unit_test=False, action_name=None):
@@ -328,18 +345,19 @@ class AbstractContext(with_metaclass(ABCMeta, object)):
             desc, _inputs = dummy_input._to_config_description(input_map)
             description += '\n\n' + desc
 
+        g_params = self._generate_global_params(DevideId=self.device_id,
+                                                Precision='"{0}"'.format(self.precision))                                                
+
         tmpl = open(CNTK_EVAL_TEMPLATE_PATH, "r").read()
         
         tmpl_dict = {
             'ActionName': action_name,
-            'DevideId': self.device_id,
-            'Precision': self.precision,
             'NodeUnitTest': node_unit_test,
             'OutputFile': self.output_filename_base,
             'ModelDescription': description,
             'Reader': input_map._to_config_description(),
         }
-        return tmpl % tmpl_dict
+        return "{0}\n{1}".format(g_params, tmpl % tmpl_dict)
 
 class LocalExecutionContext(AbstractContext):
 
diff --git a/contrib/Python/cntk/templates/cntk_eval_template.cntk b/contrib/Python/cntk/templates/cntk_eval_template.cntk
index a2b5f080b..eb718087f 100644
--- a/contrib/Python/cntk/templates/cntk_eval_template.cntk
+++ b/contrib/Python/cntk/templates/cntk_eval_template.cntk
@@ -1,10 +1,7 @@
-
-deviceId=%(DevideId)s
-precision="%(Precision)s"
-
 %(ActionName)s=[
     action="write"
-    nodeUnitTest=%(NodeUnitTest)s
+
+	nodeUnitTest=%(NodeUnitTest)s
     run=BrainScriptNetworkBuilder
 
     BrainScriptNetworkBuilder=[
diff --git a/contrib/Python/cntk/templates/cntk_global_params_template.cntk b/contrib/Python/cntk/templates/cntk_global_params_template.cntk
new file mode 100644
index 000000000..e2662dc03
--- /dev/null
+++ b/contrib/Python/cntk/templates/cntk_global_params_template.cntk
@@ -0,0 +1,4 @@
+modelPath="%(ModelPath)s"
+deviceId=%(DevideId)s
+precision="%(Precision)s"
+
diff --git a/contrib/Python/cntk/templates/cntk_test_template.cntk b/contrib/Python/cntk/templates/cntk_test_template.cntk
index 64b518287..e2ba2d952 100644
--- a/contrib/Python/cntk/templates/cntk_test_template.cntk
+++ b/contrib/Python/cntk/templates/cntk_test_template.cntk
@@ -1,11 +1,6 @@
-
-modelPath="%(ModelPath)s"
-deviceId=%(DevideId)s
-precision="%(Precision)s"
-
 %(ActionName)s=[
     action="test"
-        
+	
 %(Reader)s
 ]
 
diff --git a/contrib/Python/cntk/templates/cntk_train_template.cntk b/contrib/Python/cntk/templates/cntk_train_template.cntk
index b6741df87..aff0aba17 100644
--- a/contrib/Python/cntk/templates/cntk_train_template.cntk
+++ b/contrib/Python/cntk/templates/cntk_train_template.cntk
@@ -1,10 +1,6 @@
-
-modelPath="%(ModelPath)s"
-deviceId=%(DevideId)s
-precision="%(Precision)s"
-
 %(ActionName)s=[
     action="train"
+
     run=BrainScriptNetworkBuilder
 
     BrainScriptNetworkBuilder=[
diff --git a/contrib/Python/cntk/templates/cntk_write_template.cntk b/contrib/Python/cntk/templates/cntk_write_template.cntk
index 1ef30491e..baaa027cf 100644
--- a/contrib/Python/cntk/templates/cntk_write_template.cntk
+++ b/contrib/Python/cntk/templates/cntk_write_template.cntk
@@ -1,10 +1,5 @@
-
-modelPath="%(ModelPath)s"
-deviceId=%(DevideId)s
-precision="%(Precision)s"
-
 %(ActionName)s=[
-	action="write"
+	action="write"	
 	
 %(Reader)s
 	
diff --git a/contrib/Python/cntk/utils/eval.py b/contrib/Python/cntk/utils/eval.py
index aa952797c..e027e7a7d 100644
--- a/contrib/Python/cntk/utils/eval.py
+++ b/contrib/Python/cntk/utils/eval.py
@@ -27,7 +27,8 @@ def eval(node):
     
     from cntk.context import get_context        
     from cntk.ops import input_numpy, constant
-
+    from cntk.graph import ComputationNode
+    
     # call a helper method to get a context
     ctx = get_context()    
     first = True    
@@ -38,18 +39,19 @@ def eval(node):
         for p in node.params:
             if p in node.inputs:
                 val = getattr(node, p)
-                # One param needs to be an Input() node. This will being fixed in 
-                # CNTK soon, so that we can remove this workaround and evaluate a 
-                # network with no inputs.
-                if first:
-                    if not isinstance(val, list):                
-                        # inputs have the outmost dimension for sequences
-                        val = [val]
-
-                    ir = input_numpy([val], alias=p, name=p)
-                    setattr(node, p, ir)
-                    first = False
-                else:
-                    setattr(node, p, constant(getattr(node, p), name=p))
+                if not isinstance(val, ComputationNode):
+                    # One param needs to be an Input() node. This will being fixed in 
+                    # CNTK soon, so that we can remove this workaround and evaluate a 
+                    # network with no inputs.
+                    if first:
+                        if not isinstance(val, list):                
+                            # inputs have the outmost dimension for sequences
+                            val = [val]
+    
+                        ir = input_numpy([val], alias=p, name=p)
+                        setattr(node, p, ir)
+                        first = False
+                    else:
+                        setattr(node, p, constant(getattr(node, p), name=p))
 
     return ctx.eval(node)
diff --git a/contrib/Python/cntk/tests/eval_test.py b/contrib/Python/cntk/utils/tests/eval_test.py
similarity index 100%
rename from contrib/Python/cntk/tests/eval_test.py
rename to contrib/Python/cntk/utils/tests/eval_test.py

From 037f1505549882c2eb96f71107a42aed565ba6d8 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 15:32:03 +0200
Subject: [PATCH 11/51] add more unit tests for eval

---
 contrib/Python/cntk/utils/eval.py            |  5 ++-
 contrib/Python/cntk/utils/tests/eval_test.py | 34 ++++++++++++++++++--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/contrib/Python/cntk/utils/eval.py b/contrib/Python/cntk/utils/eval.py
index e027e7a7d..2c8939542 100644
--- a/contrib/Python/cntk/utils/eval.py
+++ b/contrib/Python/cntk/utils/eval.py
@@ -53,5 +53,8 @@ def eval(node):
                         first = False
                     else:
                         setattr(node, p, constant(getattr(node, p), name=p))
-
+                else:
+                    if val.op_name == 'CNTK2.Input' and first:
+                        first = False
+                        
     return ctx.eval(node)
diff --git a/contrib/Python/cntk/utils/tests/eval_test.py b/contrib/Python/cntk/utils/tests/eval_test.py
index b05269703..af6288397 100644
--- a/contrib/Python/cntk/utils/tests/eval_test.py
+++ b/contrib/Python/cntk/utils/tests/eval_test.py
@@ -12,11 +12,41 @@ operations
 
 import numpy as np
 import cntk
-from ..ops import plus
 
 import pytest
 
 def test_eval_plus():
-    result = cntk.eval(plus([1., 2., 3., 4.], [1., 1., 0., 0.]))
+    result = cntk.eval(cntk.plus([1., 2., 3., 4.], [1., 1., 0., 0.]))
     TOLERANCE_ABSOLUTE = 1E-06    
     assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
+
+def test_eval_plus_one_input():
+    result = cntk.eval(cntk.plus(cntk.input_numpy([[1., 2., 3., 4.]]), [1., 1., 0., 0.]))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
+
+def test_eval_plus_one_input_last():
+    result = cntk.eval(cntk.plus([1., 2., 3., 4.], cntk.input_numpy([[1., 1., 0., 0.]])))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
+
+def test_eval_plus_two_inputs():
+    result = cntk.eval(cntk.plus(cntk.input_numpy([[1., 2., 3., 4.]]), cntk.input_numpy([[1., 1., 0., 0.]])))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
+	
+def test_eval_plus_one_constant():
+    result = cntk.eval(cntk.plus(cntk.constant([1., 2., 3., 4.]), [1., 1., 0., 0.]))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)	
+	
+def test_eval_plus_one_constant_last():
+    result = cntk.eval(cntk.plus([1., 2., 3., 4.], cntk.constant([1., 1., 0., 0.])))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)		
+
+# this is dis-activated for now because we cannot have a netowrk without inputs	
+def _test_eval_plus_two_constants():
+    result = cntk.eval(cntk.plus(cntk.constant([1., 2., 3., 4.]), cntk.constant([1., 1., 0., 0.])))
+    TOLERANCE_ABSOLUTE = 1E-06    
+    assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)			
\ No newline at end of file

From 60daf7fe8bede2b5cd9edc327775e56392575d9a Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 15:34:19 +0200
Subject: [PATCH 12/51] guard against null values in test

---
 contrib/Python/cntk/context.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/contrib/Python/cntk/context.py b/contrib/Python/cntk/context.py
index b083e1702..e58e4a0d4 100644
--- a/contrib/Python/cntk/context.py
+++ b/contrib/Python/cntk/context.py
@@ -683,6 +683,10 @@ class LocalExecutionContext(AbstractContext):
             dictionary containing `SamplesSeen`, `Perplexity`, and values for
             objective and evaluation error indexed by their node names
         '''
+        
+        if root_nodes is None and input_map is None:
+            raise ValueError('If input_map is None, you have to specify root_nodes.')        
+        
         action_name = "Test"
         config_content = self._generate_test_config(root_nodes, input_map, 
                                                     action_name = action_name)

From 8582774421dac0994d6176b4c5a0946aa8b96617 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:12:23 +0200
Subject: [PATCH 13/51] add sgd parallel params

---
 contrib/Python/cntk/sgd.py | 76 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 50fac5747..51faa3438 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -3,6 +3,8 @@
 # for full license information.
 # ==============================================================================
 
+
+
 class SGDParams:
     """
     This class encapsulates the training parameters of Stochastic Gradien
@@ -260,6 +262,76 @@ class SGDParams:
         self.trace_node_names_category = trace_node_names_category
         self.trace_node_names_sparse = trace_node_names_sparse
         self.gradient_check = gradient_check
+        self.parallel_training_ = None
+        
+    def _set_global_parallel_params(self, 
+                                    parallalization_method = 'none', 
+                                    parallelization_start_epoch = 0,
+                                    distributed_mb_reading = False,
+                                    sync_perf_stats = 0):
+        self.parallel_training = {
+            'parallelizationMethod':parallalization_method,
+            'parallelizationStartEpoch':parallelization_start_epoch,
+            'distributedMBReading':distributed_mb_reading,
+            'syncPerfStats':sync_perf_stats}
+            
+    def set_parallel_to_data_parallel(self, 
+                                      parallelization_start_epoch = 0,
+                                      distributed_mb_reading = False,
+                                      sync_perf_stats = 0,
+                                      gradient_bits = 8,
+                                      use_zero_threshold_for_1bit = True,
+                                      use_buffered_async_gradient_aggregation = False):
+
+        self._set_global_parallel_params('DataParallelSGD',
+                                         parallelization_start_epoch,
+                                         distributed_mb_reading,
+                                         sync_perf_stats)
+        
+        self.parallel_training_subblock = {
+            'gradientBits':gradient_bits,        
+            'useZeroThresholdFor1BitQuantization':use_zero_threshold_for_1bit,
+            'useBufferedAsyncGradientAggregation':use_buffered_async_gradient_aggregation}
+            
+    def set_parallel_to_model_average(self, 
+                                      parallelization_start_epoch = 0,
+                                      distributed_mb_reading = False,
+                                      sync_perf_stats = 0,
+                                      sync_period = 40000,
+                                      sync_frequency_in_frames = None):
+        
+        self._set_global_parallel_params('ModelAveragingSGD',
+                                         parallelization_start_epoch,
+                                         distributed_mb_reading,
+                                         sync_perf_stats)
+        
+        self.parallel_training_subblock = {
+            'syncPeriod':sync_period,                    
+            'syncFrequencyInFrames':sync_frequency_in_frames}
+                                              
+    def set_parallel_to_block_momentum(self, 
+                                      parallelization_start_epoch = 0,
+                                      distributed_mb_reading = False,
+                                      sync_perf_stats = 0,
+                                      sync_period = 120000,
+                                      reset_sgd_momentum = True,
+                                      use_nesterov_momentum = True,
+                                      block_learning_rate = 1.0,
+                                      block_momentum_per_sync = None,
+                                      block_momentum_as_time_constant = None):
+        
+        self._set_global_parallel_params('BlockMomentumSGD',
+                                         parallelization_start_epoch,
+                                         distributed_mb_reading,
+                                         sync_perf_stats)
+        
+        self.parallel_training_subblock = {
+            'syncPeriod':sync_period,        
+            'resetSGDMomentum':reset_sgd_momentum,
+            'useNesterovMomentum':use_nesterov_momentum,
+            'blockLearningRate':block_learning_rate,
+            'blockMomentumPerSync':block_momentum_per_sync,
+            'blockMomentumAsTimeConstant':block_momentum_as_time_constant}
 
     def _to_config_description(self):
         """Generate the SGDParams configuration block
@@ -268,7 +340,9 @@ class SGDParams:
         auto_adjust_block = []
         for k, v in self.__dict__.items():
             if  k[0] != '_' and v is not None:
-                # this is a sub-block
+                # this is a sub-block. 
+                #TODO: perhaps move this to a separete method (set_auto_adjust),
+                # but then the user would need to call it explicitly 
                 if k in self._auto_adjust_params:
                     auto_adjust_block.append('\t{0} = {1}\n'.format(self._py_to_cntk[k], v))
                 else:

From 744db6c37fe1c4c77df2447ddd6c15c80c74b414 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:21:30 +0200
Subject: [PATCH 14/51] add config generation for parallel sgd

---
 contrib/Python/cntk/sgd.py | 56 ++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 51faa3438..2714e3c61 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -265,10 +265,10 @@ class SGDParams:
         self.parallel_training_ = None
         
     def _set_global_parallel_params(self, 
-                                    parallalization_method = 'none', 
-                                    parallelization_start_epoch = 0,
-                                    distributed_mb_reading = False,
-                                    sync_perf_stats = 0):
+                                    parallalization_method = None, 
+                                    parallelization_start_epoch = None,
+                                    distributed_mb_reading = None,
+                                    sync_perf_stats = None):
         self.parallel_training = {
             'parallelizationMethod':parallalization_method,
             'parallelizationStartEpoch':parallelization_start_epoch,
@@ -276,12 +276,12 @@ class SGDParams:
             'syncPerfStats':sync_perf_stats}
             
     def set_parallel_to_data_parallel(self, 
-                                      parallelization_start_epoch = 0,
-                                      distributed_mb_reading = False,
-                                      sync_perf_stats = 0,
-                                      gradient_bits = 8,
-                                      use_zero_threshold_for_1bit = True,
-                                      use_buffered_async_gradient_aggregation = False):
+                                      parallelization_start_epoch = None,
+                                      distributed_mb_reading = None,
+                                      sync_perf_stats = None,
+                                      gradient_bits = None,
+                                      use_zero_threshold_for_1bit = None,
+                                      use_buffered_async_gradient_aggregation = None):
 
         self._set_global_parallel_params('DataParallelSGD',
                                          parallelization_start_epoch,
@@ -294,10 +294,10 @@ class SGDParams:
             'useBufferedAsyncGradientAggregation':use_buffered_async_gradient_aggregation}
             
     def set_parallel_to_model_average(self, 
-                                      parallelization_start_epoch = 0,
-                                      distributed_mb_reading = False,
-                                      sync_perf_stats = 0,
-                                      sync_period = 40000,
+                                      parallelization_start_epoch = None,
+                                      distributed_mb_reading = None,
+                                      sync_perf_stats = None,
+                                      sync_period = None,
                                       sync_frequency_in_frames = None):
         
         self._set_global_parallel_params('ModelAveragingSGD',
@@ -310,13 +310,13 @@ class SGDParams:
             'syncFrequencyInFrames':sync_frequency_in_frames}
                                               
     def set_parallel_to_block_momentum(self, 
-                                      parallelization_start_epoch = 0,
-                                      distributed_mb_reading = False,
-                                      sync_perf_stats = 0,
-                                      sync_period = 120000,
-                                      reset_sgd_momentum = True,
-                                      use_nesterov_momentum = True,
-                                      block_learning_rate = 1.0,
+                                      parallelization_start_epoch = None,
+                                      distributed_mb_reading = None,
+                                      sync_perf_stats = None,
+                                      sync_period = None,
+                                      reset_sgd_momentum = None,
+                                      use_nesterov_momentum = None,
+                                      block_learning_rate = None,
                                       block_momentum_per_sync = None,
                                       block_momentum_as_time_constant = None):
         
@@ -333,6 +333,20 @@ class SGDParams:
             'blockMomentumPerSync':block_momentum_per_sync,
             'blockMomentumAsTimeConstant':block_momentum_as_time_constant}
 
+
+    def _generate_parallel_training_config(self):
+        config = ['ParallelTrain=[']        
+        for k,v in self.parallel_training.items():
+            if v:
+                config.append('\t{0} = {1}'.format(k, v))    
+        
+        config.append('\t{0} = ['.format(self.parallel_training['parallelizationMethod']))    
+        for k,v in self.parallel_training_subblock.items():            
+            if v:
+                config.append('\t\t{0} = {1}'.format(k, v))    
+        config.append['\t]']                
+        config.append[']']
+        
     def _to_config_description(self):
         """Generate the SGDParams configuration block
         """

From 78642b66a6ff50794a4e004d0c53a2a51b19fd59 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:22:33 +0200
Subject: [PATCH 15/51] add config generation for parallel sgd

---
 contrib/Python/cntk/sgd.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 2714e3c61..15d22e837 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -346,6 +346,7 @@ class SGDParams:
                 config.append('\t\t{0} = {1}'.format(k, v))    
         config.append['\t]']                
         config.append[']']
+        return '\n'.join(config)
         
     def _to_config_description(self):
         """Generate the SGDParams configuration block
@@ -366,4 +367,8 @@ class SGDParams:
             config.append("autoAdjust=[\n")
             config.extend(auto_adjust_block)
             config.append("\t]")
+            
+        if self.parallel_training:
+            config.append(self._generate_parallel_training_config())
+            
         return ''.join(config)

From d7278ab348dd7290ebefc8b196dedee33c24cde9 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:24:01 +0200
Subject: [PATCH 16/51] fix typo in param name

---
 contrib/Python/cntk/sgd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 15d22e837..0e6baa080 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -262,7 +262,7 @@ class SGDParams:
         self.trace_node_names_category = trace_node_names_category
         self.trace_node_names_sparse = trace_node_names_sparse
         self.gradient_check = gradient_check
-        self.parallel_training_ = None
+        self.parallel_training = None
         
     def _set_global_parallel_params(self, 
                                     parallalization_method = None, 

From 98a6b086d89735be3b46b900c12fd954c625013f Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:35:38 +0200
Subject: [PATCH 17/51] add doc string for SGD

---
 contrib/Python/cntk/sgd.py | 51 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 0e6baa080..0db8fd5e9 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -283,6 +283,19 @@ class SGDParams:
                                       use_zero_threshold_for_1bit = None,
                                       use_buffered_async_gradient_aggregation = None):
 
+        """
+        This function sets the parallel training to Data Paralllel SGD.
+                
+        Args:
+            parallelization_start_epoch: accepts integer value; default is 1
+            distributed_mb_reading: accepts boolean value:  True  or  False ; 
+            default is False It is recommended to turn distributed minibatch 
+            reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats: accepts integer value; default is 0
+            gradient_bits: the number of bits used to send gradient updates
+            use_zero_threshold_for_1bit: TBA
+            use_buffered_async_gradient_aggregation: TBA
+        """
         self._set_global_parallel_params('DataParallelSGD',
                                          parallelization_start_epoch,
                                          distributed_mb_reading,
@@ -299,7 +312,19 @@ class SGDParams:
                                       sync_perf_stats = None,
                                       sync_period = None,
                                       sync_frequency_in_frames = None):
-        
+        """
+        This function sets the parallel training to Model Averaging SGD.
+                
+        Args:
+            parallelization_start_epoch : accepts integer value; default is 1
+            distributed_mb_reading : accepts boolean value:  True  or  False ; 
+            default is False It is recommended to turn distributed minibatch 
+            reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats: accepts integer value; default is 0
+            sync_period: specifies the number of samples that each worker need 
+            to process before a model averaging is conducted. The default value is 40,000.
+            sync_frequency_in_frames: TBA       
+        """        
         self._set_global_parallel_params('ModelAveragingSGD',
                                          parallelization_start_epoch,
                                          distributed_mb_reading,
@@ -319,7 +344,31 @@ class SGDParams:
                                       block_learning_rate = None,
                                       block_momentum_per_sync = None,
                                       block_momentum_as_time_constant = None):
+        """
+        This function sets the parallel training to Block Momentum SGD.
+                
+        Args:
+            parallelization_start_epoch : accepts integer value; default is 1
+            distributed_mb_reading : accepts boolean value:  True  or  False ; 
+            default is False It is recommended to turn distributed minibatch 
+            reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats: accepts integer value; default is 0
+            sync_period: it specifies how frequent a model synchronization is performed. 
+            The default value is 120,000.
+            reset_sgd_momentum: This means after every synchronization point, 
+            the smoothed gradient used in local SGD will be set as 0. The default
+            value of this variable is True. 
+            use_nesterov_momentum: This means the Nestrov style block momentum 
+            is applied. The default value of this variable is True. 
+            block_learning_rate: specifies the block learning rate. 
+            block_momentum_per_sync: TBA
+            block_momentum_as_time_constant: specifies the time constant of the 
+            low-pass filter in block-level model update. It is calculated as: 
+            blockMomentumAsTimeConstant = -syncPeriod / log(block_momentum). 
+            Note that block_momentum_per_sync and block_momentum_as_time_constant 
+            are mutually exclusive
         
+        """        
         self._set_global_parallel_params('BlockMomentumSGD',
                                          parallelization_start_epoch,
                                          distributed_mb_reading,

From fcc66631044923220bbb72497f1319f6879d69aa Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:42:58 +0200
Subject: [PATCH 18/51] fix bug in config generation for sgd parallele training

---
 contrib/Python/cntk/sgd.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 0db8fd5e9..956c1222d 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -393,8 +393,8 @@ class SGDParams:
         for k,v in self.parallel_training_subblock.items():            
             if v:
                 config.append('\t\t{0} = {1}'.format(k, v))    
-        config.append['\t]']                
-        config.append[']']
+        config.append('\t]')
+        config.append(']')
         return '\n'.join(config)
         
     def _to_config_description(self):
@@ -403,7 +403,7 @@ class SGDParams:
         config = []
         auto_adjust_block = []
         for k, v in self.__dict__.items():
-            if  k[0] != '_' and v is not None:
+            if  not k.startswith('parallel_training') and k[0] != '_' and v is not None:
                 # this is a sub-block. 
                 #TODO: perhaps move this to a separete method (set_auto_adjust),
                 # but then the user would need to call it explicitly 

From fb91d0721b18a95631a2dd61992efa292490aacc Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Tue, 10 May 2016 19:47:27 +0200
Subject: [PATCH 19/51] remove no longer needed template file for global params

---
 .../Python/cntk/templates/cntk_global_params_template.cntk    | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 contrib/Python/cntk/templates/cntk_global_params_template.cntk

diff --git a/contrib/Python/cntk/templates/cntk_global_params_template.cntk b/contrib/Python/cntk/templates/cntk_global_params_template.cntk
deleted file mode 100644
index e2662dc03..000000000
--- a/contrib/Python/cntk/templates/cntk_global_params_template.cntk
+++ /dev/null
@@ -1,4 +0,0 @@
-modelPath="%(ModelPath)s"
-deviceId=%(DevideId)s
-precision="%(Precision)s"
-

From 94e811504d047ac4f4e42a8bb5760b0dd86213e7 Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Thu, 12 May 2016 11:30:24 +0200
Subject: [PATCH 20/51] address CR comments

---
 contrib/Python/cntk/context.py |  4 +--
 contrib/Python/cntk/sgd.py     | 63 +++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/contrib/Python/cntk/context.py b/contrib/Python/cntk/context.py
index e58e4a0d4..e3e637a8f 100644
--- a/contrib/Python/cntk/context.py
+++ b/contrib/Python/cntk/context.py
@@ -685,7 +685,7 @@ class LocalExecutionContext(AbstractContext):
         '''
         
         if root_nodes is None and input_map is None:
-            raise ValueError('If input_map is None, you have to specify root_nodes.')        
+            raise ValueError('if input_map is None, you have to specify root_nodes.')        
         
         action_name = "Test"
         config_content = self._generate_test_config(root_nodes, input_map, 
@@ -832,7 +832,7 @@ class DeferredExecutionContext(AbstractContext):
             input_map (:class:`cntk.reader.InputMap`): describes how to map inputs to the data in a data file using a reader
         '''
         if root_nodes is None and input_map is None:
-            raise ValueError('If input_map is None, you have to specify root_nodes.')
+            raise ValueError('if input_map is None, you have to specify root_nodes.')
 
         action_name = "Test"
         config_content = self._generate_test_config(root_nodes, input_map, action_name)
diff --git a/contrib/Python/cntk/sgd.py b/contrib/Python/cntk/sgd.py
index 956c1222d..8960f02f9 100644
--- a/contrib/Python/cntk/sgd.py
+++ b/contrib/Python/cntk/sgd.py
@@ -287,12 +287,11 @@ class SGDParams:
         This function sets the parallel training to Data Paralllel SGD.
                 
         Args:
-            parallelization_start_epoch: accepts integer value; default is 1
-            distributed_mb_reading: accepts boolean value:  True  or  False ; 
-            default is False It is recommended to turn distributed minibatch 
-            reading on to minimize the I/O cost in each worker. 
-            sync_perf_stats: accepts integer value; default is 0
-            gradient_bits: the number of bits used to send gradient updates
+            parallelization_start_epoch (int): accepts integer value; default is 1
+            distributed_mb_reading (bool): default is False It is recommended to 
+                turn distributed minibatch reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats (int): accepts integer value; default is 0
+            gradient_bits (int): the number of bits used to send gradient updates
             use_zero_threshold_for_1bit: TBA
             use_buffered_async_gradient_aggregation: TBA
         """
@@ -316,13 +315,13 @@ class SGDParams:
         This function sets the parallel training to Model Averaging SGD.
                 
         Args:
-            parallelization_start_epoch : accepts integer value; default is 1
-            distributed_mb_reading : accepts boolean value:  True  or  False ; 
-            default is False It is recommended to turn distributed minibatch 
-            reading on to minimize the I/O cost in each worker. 
-            sync_perf_stats: accepts integer value; default is 0
-            sync_period: specifies the number of samples that each worker need 
-            to process before a model averaging is conducted. The default value is 40,000.
+            parallelization_start_epoch (int): accepts integer value; default is 1
+            distributed_mb_reading (int): accepts boolean value:  True  or  False ; 
+                default is False It is recommended to turn distributed minibatch 
+                reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats (int): accepts integer value; default is 0
+            sync_period (int): specifies the number of samples that each worker need 
+                to process before a model averaging is conducted. The default value is 40,000.
             sync_frequency_in_frames: TBA       
         """        
         self._set_global_parallel_params('ModelAveragingSGD',
@@ -348,25 +347,25 @@ class SGDParams:
         This function sets the parallel training to Block Momentum SGD.
                 
         Args:
-            parallelization_start_epoch : accepts integer value; default is 1
-            distributed_mb_reading : accepts boolean value:  True  or  False ; 
-            default is False It is recommended to turn distributed minibatch 
-            reading on to minimize the I/O cost in each worker. 
-            sync_perf_stats: accepts integer value; default is 0
+            parallelization_start_epoch (int): accepts integer value; default is 1
+            distributed_mb_reading (bool): accepts boolean value:  True  or  False ; 
+                default is False It is recommended to turn distributed minibatch 
+                reading on to minimize the I/O cost in each worker. 
+            sync_perf_stats (int): accepts integer value; default is 0
             sync_period: it specifies how frequent a model synchronization is performed. 
-            The default value is 120,000.
-            reset_sgd_momentum: This means after every synchronization point, 
-            the smoothed gradient used in local SGD will be set as 0. The default
-            value of this variable is True. 
-            use_nesterov_momentum: This means the Nestrov style block momentum 
-            is applied. The default value of this variable is True. 
-            block_learning_rate: specifies the block learning rate. 
+                The default value is 120,000.
+            reset_sgd_momentum (bool): This means after every synchronization point, 
+                the smoothed gradient used in local SGD will be set as 0. The default
+                value of this variable is True. 
+            use_nesterov_momentum (bool): This means the Nestrov style block momentum 
+                is applied. The default value of this variable is True. 
+            block_learning_rate (float): specifies the block learning rate. 
             block_momentum_per_sync: TBA
-            block_momentum_as_time_constant: specifies the time constant of the 
-            low-pass filter in block-level model update. It is calculated as: 
-            blockMomentumAsTimeConstant = -syncPeriod / log(block_momentum). 
-            Note that block_momentum_per_sync and block_momentum_as_time_constant 
-            are mutually exclusive
+            block_momentum_as_time_constant (float): specifies the time constant of the 
+                low-pass filter in block-level model update. It is calculated as: 
+                blockMomentumAsTimeConstant = -syncPeriod / log(block_momentum). 
+                Note that block_momentum_per_sync and block_momentum_as_time_constant 
+                are mutually exclusive
         
         """        
         self._set_global_parallel_params('BlockMomentumSGD',
@@ -386,12 +385,12 @@ class SGDParams:
     def _generate_parallel_training_config(self):
         config = ['ParallelTrain=[']        
         for k,v in self.parallel_training.items():
-            if v:
+            if v is not None:
                 config.append('\t{0} = {1}'.format(k, v))    
         
         config.append('\t{0} = ['.format(self.parallel_training['parallelizationMethod']))    
         for k,v in self.parallel_training_subblock.items():            
-            if v:
+            if v is not None:
                 config.append('\t\t{0} = {1}'.format(k, v))    
         config.append('\t]')
         config.append(']')

From a99c334c1cf7756d176cadf912d4cb9202143f0a Mon Sep 17 00:00:00 2001
From: jeanfad <jeanfad@microsoft.com>
Date: Thu, 12 May 2016 11:30:48 +0200
Subject: [PATCH 21/51] guard against faulty input data

---
 contrib/Python/cntk/ops/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/contrib/Python/cntk/ops/__init__.py b/contrib/Python/cntk/ops/__init__.py
index cd1b22846..ae2489d48 100644
--- a/contrib/Python/cntk/ops/__init__.py
+++ b/contrib/Python/cntk/ops/__init__.py
@@ -635,6 +635,9 @@ def input_numpy(value, alias=None, dynamic_axis='', name=None):
         else:
             cntk_shape = value[0].shape
 
+        if len(cntk_shape) == 0:
+            raise ValueError('value should be an array of input samples')
+            
         node = input(cntk_shape, dynamic_axis=dynamic_axis)
         from ..reader import LazyInputReader
         node.reader = LazyInputReader(

From 5f2cfa020aad23511915751df2f164fdd897b591 Mon Sep 17 00:00:00 2001
From: Mark Hillebrand <mahilleb@microsoft.com>
Date: Thu, 12 May 2016 12:48:45 +0200
Subject: [PATCH 22/51] msra::asr::htkfeatreader::parsedpath: accept trailing
 carriage return

---
 Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp | 2 +-
 Source/Readers/HTKMLFReader/htkfeatio.h                  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp b/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp
index 9b376e184..7daa1c67c 100644
--- a/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp
@@ -231,7 +231,7 @@ vector<wstring> ConfigHelper::GetSequencePaths()
     // post processing file list :
     //  - if users specified PrefixPath, add the prefix to each of path in filelist
     //  - else do the dotdotdot expansion if necessary
-    if (!rootPath.empty()) // use has specified a path prefix for this  feature
+    if (!rootPath.empty()) // user has specified a path prefix for this feature
     {
         // first make slash consistent (sorry for Linux users:this is not necessary for you)
         replace(rootPath.begin(), rootPath.end(), L'\\', L'/');
diff --git a/Source/Readers/HTKMLFReader/htkfeatio.h b/Source/Readers/HTKMLFReader/htkfeatio.h
index 1b9129c39..cc9656145 100644
--- a/Source/Readers/HTKMLFReader/htkfeatio.h
+++ b/Source/Readers/HTKMLFReader/htkfeatio.h
@@ -417,7 +417,8 @@ public:
                     if (xpath.empty())
                         malformed(pathParam);
                     e = msra::strfun::toint(consume(xpath, L"]"));
-                    if (!xpath.empty())
+                    // TODO \r should be handled elsewhere; refine this
+                    if (!xpath.empty() && xpath != L"\r")
                         malformed(pathParam);
                     isarchive = true;
                 }

From 3600ed6fb6406ebf1ef310b5bd9e2f846ac0881f Mon Sep 17 00:00:00 2001
From: Alexey Reznichenko <alrezni@microsoft.com>
Date: Thu, 12 May 2016 10:16:36 +0200
Subject: [PATCH 23/51] Add missing #define __STDC_FORMAT_MACROS

    Close #464
---
 Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp b/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp
index c998e94f1..de04a03a3 100644
--- a/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp
+++ b/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "stdafx.h"
+#define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 #include <limits>
 #include "TextConfigHelper.h"

From 7a83e7eda340b5b8b32e7b7ceb0a335404d48149 Mon Sep 17 00:00:00 2001
From: Mark Hillebrand <mahilleb@microsoft.com>
Date: Mon, 9 May 2016 16:30:50 +0200
Subject: [PATCH 24/51] Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/:
 re-instate test for identical baselines

(Reverts parts of 04abf66d78420c410da9bf9e4ba82d88223f95b1)
---
 .../baseline.cpu.txt                          |  432 +--
 .../baseline.gpu.txt                          |  952 +++---
 .../baseline.windows.cpu.txt                  |  432 +--
 .../baseline.windows.gpu.txt                  |  920 +++---
 .../DNN/DiscriminativePreTraining/run-test    |    6 +
 .../Parallel1BitQuantization/baseline.cpu.txt |  966 +++---
 .../Parallel1BitQuantization/baseline.gpu.txt |  968 +++---
 .../baseline.windows.cpu.txt                  |  939 +++---
 .../baseline.windows.gpu.txt                  |  934 +++---
 .../DNN/Parallel1BitQuantization/run-test     |    6 +
 .../baseline.cpu.txt                          | 1297 ++++----
 .../baseline.gpu.txt                          | 1299 ++++----
 .../baseline.windows.cpu.txt                  | 1215 ++++----
 .../baseline.windows.gpu.txt                  | 1259 ++++----
 .../run-test                                  |    6 +
 .../ParallelNoQuantization/baseline.cpu.txt   |  976 +++---
 .../ParallelNoQuantization/baseline.gpu.txt   |  976 +++---
 .../baseline.windows.cpu.txt                  |  934 +++---
 .../baseline.windows.gpu.txt                  |  932 +++---
 .../DNN/ParallelNoQuantization/run-test       |    6 +
 .../baseline.cpu.txt                          | 2639 ++++++++++------
 .../baseline.gpu.txt                          | 2664 +++++++++++------
 .../baseline.windows.cpu.txt                  | 1233 ++++----
 .../baseline.windows.gpu.txt                  | 1280 ++++----
 .../run-test                                  |    6 +
 .../LSTM/Truncated/run-test                   |    2 +-
 .../QuickE2E/baseline.cpu.txt                 |  513 ++--
 .../QuickE2E/baseline.gpu.txt                 |  513 ++--
 .../QuickE2E/baseline.windows.cpu.txt         |  497 +--
 .../QuickE2E/baseline.windows.gpu.txt         |  497 +--
 .../QuickE2E/run-test                         |    6 +
 .../SVD/baseline.cpu.txt                      |  482 +--
 .../SVD/baseline.gpu.txt                      |  482 +--
 .../SVD/baseline.windows.cpu.txt              |  470 +--
 .../SVD/baseline.windows.gpu.txt              |  472 +--
 .../ExperimentalHtkmlfReader/SVD/run-test     |    6 +
 36 files changed, 14351 insertions(+), 12866 deletions(-)

diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.cpu.txt
index a576afc19..036d0841a 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.cpu.txt
@@ -630,76 +630,76 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  3.74305420; EvalErrorPrediction = 0.81132812; TotalTime = 1.61207s; TotalTimePerSample = 0.62972ms; SamplesPerSecond = 1588
- Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.94799194; EvalErrorPrediction = 0.72539062; TotalTime = 1.71218s; TotalTimePerSample = 0.66882ms; SamplesPerSecond = 1495
- Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.62617187; EvalErrorPrediction = 0.68046875; TotalTime = 1.96081s; TotalTimePerSample = 0.76594ms; SamplesPerSecond = 1305
- Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  2.31340332; EvalErrorPrediction = 0.60000000; TotalTime = 1.98096s; TotalTimePerSample = 0.77381ms; SamplesPerSecond = 1292
- Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  2.08197784; EvalErrorPrediction = 0.56367188; TotalTime = 2.37392s; TotalTimePerSample = 0.92731ms; SamplesPerSecond = 1078
- Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.93575439; EvalErrorPrediction = 0.53437500; TotalTime = 2.27887s; TotalTimePerSample = 0.89018ms; SamplesPerSecond = 1123
- Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.83957062; EvalErrorPrediction = 0.51796875; TotalTime = 2.12047s; TotalTimePerSample = 0.82831ms; SamplesPerSecond = 1207
- Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.70679779; EvalErrorPrediction = 0.48750000; TotalTime = 1.99332s; TotalTimePerSample = 0.77864ms; SamplesPerSecond = 1284
- Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.59371796; EvalErrorPrediction = 0.45703125; TotalTime = 2.26050s; TotalTimePerSample = 0.88301ms; SamplesPerSecond = 1132
- Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.64279327; EvalErrorPrediction = 0.48593750; TotalTime = 2.19745s; TotalTimePerSample = 0.85838ms; SamplesPerSecond = 1164
- Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.60591888; EvalErrorPrediction = 0.47578125; TotalTime = 2.17477s; TotalTimePerSample = 0.84952ms; SamplesPerSecond = 1177
- Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.52358704; EvalErrorPrediction = 0.44882813; TotalTime = 2.13886s; TotalTimePerSample = 0.83549ms; SamplesPerSecond = 1196
- Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.49677734; EvalErrorPrediction = 0.44609375; TotalTime = 2.13765s; TotalTimePerSample = 0.83502ms; SamplesPerSecond = 1197
- Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.54127502; EvalErrorPrediction = 0.45273438; TotalTime = 2.20730s; TotalTimePerSample = 0.86222ms; SamplesPerSecond = 1159
- Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.43566284; EvalErrorPrediction = 0.41406250; TotalTime = 2.14483s; TotalTimePerSample = 0.83782ms; SamplesPerSecond = 1193
- Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41996460; EvalErrorPrediction = 0.40976563; TotalTime = 2.27204s; TotalTimePerSample = 0.88751ms; SamplesPerSecond = 1126
- Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.38546448; EvalErrorPrediction = 0.40976563; TotalTime = 2.37860s; TotalTimePerSample = 0.92914ms; SamplesPerSecond = 1076
- Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.40654907; EvalErrorPrediction = 0.42109375; TotalTime = 2.21343s; TotalTimePerSample = 0.86462ms; SamplesPerSecond = 1156
- Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41048584; EvalErrorPrediction = 0.41445312; TotalTime = 2.20519s; TotalTimePerSample = 0.86140ms; SamplesPerSecond = 1160
- Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.43002319; EvalErrorPrediction = 0.42500000; TotalTime = 2.10237s; TotalTimePerSample = 0.82124ms; SamplesPerSecond = 1217
- Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41000977; EvalErrorPrediction = 0.42382812; TotalTime = 2.31127s; TotalTimePerSample = 0.90284ms; SamplesPerSecond = 1107
- Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33088989; EvalErrorPrediction = 0.40468750; TotalTime = 2.30367s; TotalTimePerSample = 0.89987ms; SamplesPerSecond = 1111
- Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.28450928; EvalErrorPrediction = 0.38515625; TotalTime = 2.22202s; TotalTimePerSample = 0.86798ms; SamplesPerSecond = 1152
- Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.34680481; EvalErrorPrediction = 0.40664062; TotalTime = 2.28282s; TotalTimePerSample = 0.89173ms; SamplesPerSecond = 1121
- Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.32020569; EvalErrorPrediction = 0.39140625; TotalTime = 2.06229s; TotalTimePerSample = 0.80558ms; SamplesPerSecond = 1241
- Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22445068; EvalErrorPrediction = 0.37031250; TotalTime = 2.10306s; TotalTimePerSample = 0.82151ms; SamplesPerSecond = 1217
- Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23881226; EvalErrorPrediction = 0.37109375; TotalTime = 2.26618s; TotalTimePerSample = 0.88523ms; SamplesPerSecond = 1129
- Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.30255432; EvalErrorPrediction = 0.38984375; TotalTime = 2.17350s; TotalTimePerSample = 0.84902ms; SamplesPerSecond = 1177
- Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22100830; EvalErrorPrediction = 0.36992188; TotalTime = 1.77139s; TotalTimePerSample = 0.69195ms; SamplesPerSecond = 1445
- Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21442871; EvalErrorPrediction = 0.36445312; TotalTime = 2.28764s; TotalTimePerSample = 0.89361ms; SamplesPerSecond = 1119
- Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23150024; EvalErrorPrediction = 0.37578125; TotalTime = 2.29486s; TotalTimePerSample = 0.89643ms; SamplesPerSecond = 1115
- Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.25499268; EvalErrorPrediction = 0.37656250; TotalTime = 2.14378s; TotalTimePerSample = 0.83741ms; SamplesPerSecond = 1194
-Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.6395972; EvalErrorPrediction = 0.46284181; learningRatePerSample  = 0.003125000047; EpochTime=69.649363
+ Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  3.74305420; err = 0.81132812; TotalTime = 1.61207s; TotalTimePerSample = 0.62972ms; SamplesPerSecond = 1588
+ Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.94799194; err = 0.72539062; TotalTime = 1.71218s; TotalTimePerSample = 0.66882ms; SamplesPerSecond = 1495
+ Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.62617187; err = 0.68046875; TotalTime = 1.96081s; TotalTimePerSample = 0.76594ms; SamplesPerSecond = 1305
+ Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  2.31340332; err = 0.60000000; TotalTime = 1.98096s; TotalTimePerSample = 0.77381ms; SamplesPerSecond = 1292
+ Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  2.08197784; err = 0.56367188; TotalTime = 2.37392s; TotalTimePerSample = 0.92731ms; SamplesPerSecond = 1078
+ Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.93575439; err = 0.53437500; TotalTime = 2.27887s; TotalTimePerSample = 0.89018ms; SamplesPerSecond = 1123
+ Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.83957062; err = 0.51796875; TotalTime = 2.12047s; TotalTimePerSample = 0.82831ms; SamplesPerSecond = 1207
+ Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.70679779; err = 0.48750000; TotalTime = 1.99332s; TotalTimePerSample = 0.77864ms; SamplesPerSecond = 1284
+ Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.59371796; err = 0.45703125; TotalTime = 2.26050s; TotalTimePerSample = 0.88301ms; SamplesPerSecond = 1132
+ Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.64279327; err = 0.48593750; TotalTime = 2.19745s; TotalTimePerSample = 0.85838ms; SamplesPerSecond = 1164
+ Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.60591888; err = 0.47578125; TotalTime = 2.17477s; TotalTimePerSample = 0.84952ms; SamplesPerSecond = 1177
+ Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.52358704; err = 0.44882813; TotalTime = 2.13886s; TotalTimePerSample = 0.83549ms; SamplesPerSecond = 1196
+ Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.49677734; err = 0.44609375; TotalTime = 2.13765s; TotalTimePerSample = 0.83502ms; SamplesPerSecond = 1197
+ Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.54127502; err = 0.45273438; TotalTime = 2.20730s; TotalTimePerSample = 0.86222ms; SamplesPerSecond = 1159
+ Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.43566284; err = 0.41406250; TotalTime = 2.14483s; TotalTimePerSample = 0.83782ms; SamplesPerSecond = 1193
+ Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.41996460; err = 0.40976563; TotalTime = 2.27204s; TotalTimePerSample = 0.88751ms; SamplesPerSecond = 1126
+ Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.38546448; err = 0.40976563; TotalTime = 2.37860s; TotalTimePerSample = 0.92914ms; SamplesPerSecond = 1076
+ Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.40654907; err = 0.42109375; TotalTime = 2.21343s; TotalTimePerSample = 0.86462ms; SamplesPerSecond = 1156
+ Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.41048584; err = 0.41445312; TotalTime = 2.20519s; TotalTimePerSample = 0.86140ms; SamplesPerSecond = 1160
+ Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.43002319; err = 0.42500000; TotalTime = 2.10237s; TotalTimePerSample = 0.82124ms; SamplesPerSecond = 1217
+ Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.41000977; err = 0.42382812; TotalTime = 2.31127s; TotalTimePerSample = 0.90284ms; SamplesPerSecond = 1107
+ Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.33088989; err = 0.40468750; TotalTime = 2.30367s; TotalTimePerSample = 0.89987ms; SamplesPerSecond = 1111
+ Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.28450928; err = 0.38515625; TotalTime = 2.22202s; TotalTimePerSample = 0.86798ms; SamplesPerSecond = 1152
+ Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.34680481; err = 0.40664062; TotalTime = 2.28282s; TotalTimePerSample = 0.89173ms; SamplesPerSecond = 1121
+ Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.32020569; err = 0.39140625; TotalTime = 2.06229s; TotalTimePerSample = 0.80558ms; SamplesPerSecond = 1241
+ Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.22445068; err = 0.37031250; TotalTime = 2.10306s; TotalTimePerSample = 0.82151ms; SamplesPerSecond = 1217
+ Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.23881226; err = 0.37109375; TotalTime = 2.26618s; TotalTimePerSample = 0.88523ms; SamplesPerSecond = 1129
+ Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.30255432; err = 0.38984375; TotalTime = 2.17350s; TotalTimePerSample = 0.84902ms; SamplesPerSecond = 1177
+ Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.22100830; err = 0.36992188; TotalTime = 1.77139s; TotalTimePerSample = 0.69195ms; SamplesPerSecond = 1445
+ Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.21442871; err = 0.36445312; TotalTime = 2.28764s; TotalTimePerSample = 0.89361ms; SamplesPerSecond = 1119
+ Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.23150024; err = 0.37578125; TotalTime = 2.29486s; TotalTimePerSample = 0.89643ms; SamplesPerSecond = 1115
+ Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.25499268; err = 0.37656250; TotalTime = 2.14378s; TotalTimePerSample = 0.83741ms; SamplesPerSecond = 1194
+Finished Epoch[ 1 of 2]: [Training] ce = 1.6395972; err = 0.46284181; learningRatePerSample = 0.003125000047; EpochTime=69.649363
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  1.24432030; EvalErrorPrediction = 0.38437500; TotalTime = 2.17401s; TotalTimePerSample = 0.84922ms; SamplesPerSecond = 1177
- Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21878033; EvalErrorPrediction = 0.37500000; TotalTime = 2.22384s; TotalTimePerSample = 0.86869ms; SamplesPerSecond = 1151
- Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  1.28410358; EvalErrorPrediction = 0.37812500; TotalTime = 2.19445s; TotalTimePerSample = 0.85721ms; SamplesPerSecond = 1166
- Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22131767; EvalErrorPrediction = 0.37382813; TotalTime = 2.02087s; TotalTimePerSample = 0.78940ms; SamplesPerSecond = 1266
- Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17638283; EvalErrorPrediction = 0.35273437; TotalTime = 1.98391s; TotalTimePerSample = 0.77497ms; SamplesPerSecond = 1290
- Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.28770714; EvalErrorPrediction = 0.39218750; TotalTime = 2.10243s; TotalTimePerSample = 0.82126ms; SamplesPerSecond = 1217
- Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22729797; EvalErrorPrediction = 0.37421875; TotalTime = 2.10615s; TotalTimePerSample = 0.82271ms; SamplesPerSecond = 1215
- Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17497940; EvalErrorPrediction = 0.36953125; TotalTime = 2.13498s; TotalTimePerSample = 0.83398ms; SamplesPerSecond = 1199
- Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23583298; EvalErrorPrediction = 0.35742188; TotalTime = 2.17273s; TotalTimePerSample = 0.84872ms; SamplesPerSecond = 1178
- Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16937485; EvalErrorPrediction = 0.37187500; TotalTime = 1.96073s; TotalTimePerSample = 0.76591ms; SamplesPerSecond = 1305
- Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18656921; EvalErrorPrediction = 0.34765625; TotalTime = 2.07057s; TotalTimePerSample = 0.80882ms; SamplesPerSecond = 1236
- Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18989105; EvalErrorPrediction = 0.35781250; TotalTime = 1.97441s; TotalTimePerSample = 0.77125ms; SamplesPerSecond = 1296
- Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17073975; EvalErrorPrediction = 0.36445312; TotalTime = 2.17177s; TotalTimePerSample = 0.84835ms; SamplesPerSecond = 1178
- Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13176422; EvalErrorPrediction = 0.34375000; TotalTime = 2.20069s; TotalTimePerSample = 0.85964ms; SamplesPerSecond = 1163
- Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.08576660; EvalErrorPrediction = 0.32421875; TotalTime = 2.25159s; TotalTimePerSample = 0.87953ms; SamplesPerSecond = 1136
- Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11148376; EvalErrorPrediction = 0.33867188; TotalTime = 2.25098s; TotalTimePerSample = 0.87929ms; SamplesPerSecond = 1137
- Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20480194; EvalErrorPrediction = 0.36250000; TotalTime = 2.09545s; TotalTimePerSample = 0.81854ms; SamplesPerSecond = 1221
- Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17241821; EvalErrorPrediction = 0.35820313; TotalTime = 2.07758s; TotalTimePerSample = 0.81155ms; SamplesPerSecond = 1232
- Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13457642; EvalErrorPrediction = 0.35429688; TotalTime = 2.10773s; TotalTimePerSample = 0.82333ms; SamplesPerSecond = 1214
- Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12700500; EvalErrorPrediction = 0.35234375; TotalTime = 2.33236s; TotalTimePerSample = 0.91108ms; SamplesPerSecond = 1097
- Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11282043; EvalErrorPrediction = 0.33515625; TotalTime = 2.11595s; TotalTimePerSample = 0.82654ms; SamplesPerSecond = 1209
- Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13374786; EvalErrorPrediction = 0.34296875; TotalTime = 2.14508s; TotalTimePerSample = 0.83792ms; SamplesPerSecond = 1193
- Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14316711; EvalErrorPrediction = 0.35312500; TotalTime = 2.15834s; TotalTimePerSample = 0.84310ms; SamplesPerSecond = 1186
- Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27329712; EvalErrorPrediction = 0.38554688; TotalTime = 2.26755s; TotalTimePerSample = 0.88576ms; SamplesPerSecond = 1128
- Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15661011; EvalErrorPrediction = 0.34726563; TotalTime = 2.30714s; TotalTimePerSample = 0.90123ms; SamplesPerSecond = 1109
- Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13043823; EvalErrorPrediction = 0.34101562; TotalTime = 2.06456s; TotalTimePerSample = 0.80647ms; SamplesPerSecond = 1239
- Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13791809; EvalErrorPrediction = 0.34960938; TotalTime = 2.19548s; TotalTimePerSample = 0.85761ms; SamplesPerSecond = 1166
- Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14694214; EvalErrorPrediction = 0.34101562; TotalTime = 2.33209s; TotalTimePerSample = 0.91097ms; SamplesPerSecond = 1097
- Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06658325; EvalErrorPrediction = 0.33476563; TotalTime = 2.20537s; TotalTimePerSample = 0.86147ms; SamplesPerSecond = 1160
- Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10054626; EvalErrorPrediction = 0.33750000; TotalTime = 2.02138s; TotalTimePerSample = 0.78960ms; SamplesPerSecond = 1266
- Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09098816; EvalErrorPrediction = 0.34375000; TotalTime = 2.19745s; TotalTimePerSample = 0.85838ms; SamplesPerSecond = 1164
- Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.05595398; EvalErrorPrediction = 0.33671875; TotalTime = 2.11208s; TotalTimePerSample = 0.82503ms; SamplesPerSecond = 1212
-Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.165754; EvalErrorPrediction = 0.35567626; learningRatePerSample  = 0.003125000047; EpochTime=68.751932
+ Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  1.24432030; err = 0.38437500; TotalTime = 2.17401s; TotalTimePerSample = 0.84922ms; SamplesPerSecond = 1177
+ Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  1.21878033; err = 0.37500000; TotalTime = 2.22384s; TotalTimePerSample = 0.86869ms; SamplesPerSecond = 1151
+ Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  1.28410358; err = 0.37812500; TotalTime = 2.19445s; TotalTimePerSample = 0.85721ms; SamplesPerSecond = 1166
+ Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.22131767; err = 0.37382813; TotalTime = 2.02087s; TotalTimePerSample = 0.78940ms; SamplesPerSecond = 1266
+ Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.17638283; err = 0.35273437; TotalTime = 1.98391s; TotalTimePerSample = 0.77497ms; SamplesPerSecond = 1290
+ Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.28770714; err = 0.39218750; TotalTime = 2.10243s; TotalTimePerSample = 0.82126ms; SamplesPerSecond = 1217
+ Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.22729797; err = 0.37421875; TotalTime = 2.10615s; TotalTimePerSample = 0.82271ms; SamplesPerSecond = 1215
+ Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.17497940; err = 0.36953125; TotalTime = 2.13498s; TotalTimePerSample = 0.83398ms; SamplesPerSecond = 1199
+ Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.23583298; err = 0.35742188; TotalTime = 2.17273s; TotalTimePerSample = 0.84872ms; SamplesPerSecond = 1178
+ Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.16937485; err = 0.37187500; TotalTime = 1.96073s; TotalTimePerSample = 0.76591ms; SamplesPerSecond = 1305
+ Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.18656921; err = 0.34765625; TotalTime = 2.07057s; TotalTimePerSample = 0.80882ms; SamplesPerSecond = 1236
+ Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.18989105; err = 0.35781250; TotalTime = 1.97441s; TotalTimePerSample = 0.77125ms; SamplesPerSecond = 1296
+ Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.17073975; err = 0.36445312; TotalTime = 2.17177s; TotalTimePerSample = 0.84835ms; SamplesPerSecond = 1178
+ Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.13176422; err = 0.34375000; TotalTime = 2.20069s; TotalTimePerSample = 0.85964ms; SamplesPerSecond = 1163
+ Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.08576660; err = 0.32421875; TotalTime = 2.25159s; TotalTimePerSample = 0.87953ms; SamplesPerSecond = 1136
+ Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.11148376; err = 0.33867188; TotalTime = 2.25098s; TotalTimePerSample = 0.87929ms; SamplesPerSecond = 1137
+ Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.20480194; err = 0.36250000; TotalTime = 2.09545s; TotalTimePerSample = 0.81854ms; SamplesPerSecond = 1221
+ Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.17241821; err = 0.35820313; TotalTime = 2.07758s; TotalTimePerSample = 0.81155ms; SamplesPerSecond = 1232
+ Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.13457642; err = 0.35429688; TotalTime = 2.10773s; TotalTimePerSample = 0.82333ms; SamplesPerSecond = 1214
+ Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.12700500; err = 0.35234375; TotalTime = 2.33236s; TotalTimePerSample = 0.91108ms; SamplesPerSecond = 1097
+ Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.11282043; err = 0.33515625; TotalTime = 2.11595s; TotalTimePerSample = 0.82654ms; SamplesPerSecond = 1209
+ Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.13374786; err = 0.34296875; TotalTime = 2.14508s; TotalTimePerSample = 0.83792ms; SamplesPerSecond = 1193
+ Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.14316711; err = 0.35312500; TotalTime = 2.15834s; TotalTimePerSample = 0.84310ms; SamplesPerSecond = 1186
+ Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.27329712; err = 0.38554688; TotalTime = 2.26755s; TotalTimePerSample = 0.88576ms; SamplesPerSecond = 1128
+ Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.15661011; err = 0.34726563; TotalTime = 2.30714s; TotalTimePerSample = 0.90123ms; SamplesPerSecond = 1109
+ Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.13043823; err = 0.34101562; TotalTime = 2.06456s; TotalTimePerSample = 0.80647ms; SamplesPerSecond = 1239
+ Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.13791809; err = 0.34960938; TotalTime = 2.19548s; TotalTimePerSample = 0.85761ms; SamplesPerSecond = 1166
+ Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.14694214; err = 0.34101562; TotalTime = 2.33209s; TotalTimePerSample = 0.91097ms; SamplesPerSecond = 1097
+ Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.06658325; err = 0.33476563; TotalTime = 2.20537s; TotalTimePerSample = 0.86147ms; SamplesPerSecond = 1160
+ Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.10054626; err = 0.33750000; TotalTime = 2.02138s; TotalTimePerSample = 0.78960ms; SamplesPerSecond = 1266
+ Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.09098816; err = 0.34375000; TotalTime = 2.19745s; TotalTimePerSample = 0.85838ms; SamplesPerSecond = 1164
+ Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.05595398; err = 0.33671875; TotalTime = 2.11208s; TotalTimePerSample = 0.82503ms; SamplesPerSecond = 1212
+Finished Epoch[ 2 of 2]: [Training] ce = 1.165754; err = 0.35567626; learningRatePerSample = 0.003125000047; EpochTime=68.751932
 CNTKCommandTrainEnd: DPT_Pre1
 
 
@@ -1784,76 +1784,76 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  4.12742958; EvalErrorPrediction = 0.80507812; TotalTime = 3.39015s; TotalTimePerSample = 1.32428ms; SamplesPerSecond = 755
- Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.76509590; EvalErrorPrediction = 0.69960937; TotalTime = 3.25866s; TotalTimePerSample = 1.27292ms; SamplesPerSecond = 785
- Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.20613861; EvalErrorPrediction = 0.57812500; TotalTime = 3.30868s; TotalTimePerSample = 1.29245ms; SamplesPerSecond = 773
- Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.90078354; EvalErrorPrediction = 0.50898438; TotalTime = 3.24681s; TotalTimePerSample = 1.26828ms; SamplesPerSecond = 788
- Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.71711044; EvalErrorPrediction = 0.48710938; TotalTime = 3.34396s; TotalTimePerSample = 1.30623ms; SamplesPerSecond = 765
- Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.58837662; EvalErrorPrediction = 0.44726562; TotalTime = 3.54100s; TotalTimePerSample = 1.38320ms; SamplesPerSecond = 722
- Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.54870148; EvalErrorPrediction = 0.44296875; TotalTime = 3.27957s; TotalTimePerSample = 1.28108ms; SamplesPerSecond = 780
- Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.46472015; EvalErrorPrediction = 0.42773438; TotalTime = 3.39303s; TotalTimePerSample = 1.32540ms; SamplesPerSecond = 754
- Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.38026276; EvalErrorPrediction = 0.40273437; TotalTime = 3.33514s; TotalTimePerSample = 1.30279ms; SamplesPerSecond = 767
- Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.39969788; EvalErrorPrediction = 0.42148438; TotalTime = 3.28349s; TotalTimePerSample = 1.28262ms; SamplesPerSecond = 779
- Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.37001648; EvalErrorPrediction = 0.41796875; TotalTime = 3.17869s; TotalTimePerSample = 1.24168ms; SamplesPerSecond = 805
- Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.35947266; EvalErrorPrediction = 0.40820312; TotalTime = 3.54772s; TotalTimePerSample = 1.38583ms; SamplesPerSecond = 721
- Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.32850189; EvalErrorPrediction = 0.40468750; TotalTime = 3.44065s; TotalTimePerSample = 1.34401ms; SamplesPerSecond = 744
- Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.39116974; EvalErrorPrediction = 0.40703125; TotalTime = 3.30371s; TotalTimePerSample = 1.29051ms; SamplesPerSecond = 774
- Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31802521; EvalErrorPrediction = 0.38437500; TotalTime = 3.36631s; TotalTimePerSample = 1.31496ms; SamplesPerSecond = 760
- Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.32752075; EvalErrorPrediction = 0.40117188; TotalTime = 3.14228s; TotalTimePerSample = 1.22745ms; SamplesPerSecond = 814
- Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27012329; EvalErrorPrediction = 0.37929687; TotalTime = 3.23155s; TotalTimePerSample = 1.26233ms; SamplesPerSecond = 792
- Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.29055176; EvalErrorPrediction = 0.38359375; TotalTime = 3.35835s; TotalTimePerSample = 1.31186ms; SamplesPerSecond = 762
- Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.29355164; EvalErrorPrediction = 0.38593750; TotalTime = 3.42075s; TotalTimePerSample = 1.33623ms; SamplesPerSecond = 748
- Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27504883; EvalErrorPrediction = 0.38906250; TotalTime = 3.37690s; TotalTimePerSample = 1.31910ms; SamplesPerSecond = 758
- Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27267761; EvalErrorPrediction = 0.39101562; TotalTime = 3.12080s; TotalTimePerSample = 1.21906ms; SamplesPerSecond = 820
- Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21395569; EvalErrorPrediction = 0.36679688; TotalTime = 3.14198s; TotalTimePerSample = 1.22734ms; SamplesPerSecond = 814
- Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20708313; EvalErrorPrediction = 0.36445312; TotalTime = 3.50516s; TotalTimePerSample = 1.36920ms; SamplesPerSecond = 730
- Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.25370178; EvalErrorPrediction = 0.38320312; TotalTime = 3.14201s; TotalTimePerSample = 1.22735ms; SamplesPerSecond = 814
- Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22307739; EvalErrorPrediction = 0.37500000; TotalTime = 3.13153s; TotalTimePerSample = 1.22325ms; SamplesPerSecond = 817
- Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14909363; EvalErrorPrediction = 0.35234375; TotalTime = 3.41643s; TotalTimePerSample = 1.33454ms; SamplesPerSecond = 749
- Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17035828; EvalErrorPrediction = 0.35937500; TotalTime = 3.22442s; TotalTimePerSample = 1.25954ms; SamplesPerSecond = 793
- Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22515869; EvalErrorPrediction = 0.36875000; TotalTime = 3.47776s; TotalTimePerSample = 1.35850ms; SamplesPerSecond = 736
- Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16166687; EvalErrorPrediction = 0.35664062; TotalTime = 2.99925s; TotalTimePerSample = 1.17158ms; SamplesPerSecond = 853
- Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18268433; EvalErrorPrediction = 0.35820313; TotalTime = 3.23501s; TotalTimePerSample = 1.26368ms; SamplesPerSecond = 791
- Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16473083; EvalErrorPrediction = 0.35195312; TotalTime = 3.21572s; TotalTimePerSample = 1.25614ms; SamplesPerSecond = 796
- Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17591248; EvalErrorPrediction = 0.35195312; TotalTime = 3.07276s; TotalTimePerSample = 1.20030ms; SamplesPerSecond = 833
-Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.491325; EvalErrorPrediction = 0.42381594; learningRatePerSample  = 0.003125000047; EpochTime=106.67356
+ Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  4.12742958; err = 0.80507812; TotalTime = 3.39015s; TotalTimePerSample = 1.32428ms; SamplesPerSecond = 755
+ Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.76509590; err = 0.69960937; TotalTime = 3.25866s; TotalTimePerSample = 1.27292ms; SamplesPerSecond = 785
+ Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.20613861; err = 0.57812500; TotalTime = 3.30868s; TotalTimePerSample = 1.29245ms; SamplesPerSecond = 773
+ Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.90078354; err = 0.50898438; TotalTime = 3.24681s; TotalTimePerSample = 1.26828ms; SamplesPerSecond = 788
+ Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.71711044; err = 0.48710938; TotalTime = 3.34396s; TotalTimePerSample = 1.30623ms; SamplesPerSecond = 765
+ Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.58837662; err = 0.44726562; TotalTime = 3.54100s; TotalTimePerSample = 1.38320ms; SamplesPerSecond = 722
+ Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.54870148; err = 0.44296875; TotalTime = 3.27957s; TotalTimePerSample = 1.28108ms; SamplesPerSecond = 780
+ Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.46472015; err = 0.42773438; TotalTime = 3.39303s; TotalTimePerSample = 1.32540ms; SamplesPerSecond = 754
+ Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.38026276; err = 0.40273437; TotalTime = 3.33514s; TotalTimePerSample = 1.30279ms; SamplesPerSecond = 767
+ Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.39969788; err = 0.42148438; TotalTime = 3.28349s; TotalTimePerSample = 1.28262ms; SamplesPerSecond = 779
+ Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.37001648; err = 0.41796875; TotalTime = 3.17869s; TotalTimePerSample = 1.24168ms; SamplesPerSecond = 805
+ Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.35947266; err = 0.40820312; TotalTime = 3.54772s; TotalTimePerSample = 1.38583ms; SamplesPerSecond = 721
+ Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.32850189; err = 0.40468750; TotalTime = 3.44065s; TotalTimePerSample = 1.34401ms; SamplesPerSecond = 744
+ Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.39116974; err = 0.40703125; TotalTime = 3.30371s; TotalTimePerSample = 1.29051ms; SamplesPerSecond = 774
+ Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.31802521; err = 0.38437500; TotalTime = 3.36631s; TotalTimePerSample = 1.31496ms; SamplesPerSecond = 760
+ Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.32752075; err = 0.40117188; TotalTime = 3.14228s; TotalTimePerSample = 1.22745ms; SamplesPerSecond = 814
+ Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.27012329; err = 0.37929687; TotalTime = 3.23155s; TotalTimePerSample = 1.26233ms; SamplesPerSecond = 792
+ Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.29055176; err = 0.38359375; TotalTime = 3.35835s; TotalTimePerSample = 1.31186ms; SamplesPerSecond = 762
+ Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.29355164; err = 0.38593750; TotalTime = 3.42075s; TotalTimePerSample = 1.33623ms; SamplesPerSecond = 748
+ Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.27504883; err = 0.38906250; TotalTime = 3.37690s; TotalTimePerSample = 1.31910ms; SamplesPerSecond = 758
+ Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.27267761; err = 0.39101562; TotalTime = 3.12080s; TotalTimePerSample = 1.21906ms; SamplesPerSecond = 820
+ Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.21395569; err = 0.36679688; TotalTime = 3.14198s; TotalTimePerSample = 1.22734ms; SamplesPerSecond = 814
+ Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.20708313; err = 0.36445312; TotalTime = 3.50516s; TotalTimePerSample = 1.36920ms; SamplesPerSecond = 730
+ Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.25370178; err = 0.38320312; TotalTime = 3.14201s; TotalTimePerSample = 1.22735ms; SamplesPerSecond = 814
+ Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.22307739; err = 0.37500000; TotalTime = 3.13153s; TotalTimePerSample = 1.22325ms; SamplesPerSecond = 817
+ Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.14909363; err = 0.35234375; TotalTime = 3.41643s; TotalTimePerSample = 1.33454ms; SamplesPerSecond = 749
+ Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.17035828; err = 0.35937500; TotalTime = 3.22442s; TotalTimePerSample = 1.25954ms; SamplesPerSecond = 793
+ Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.22515869; err = 0.36875000; TotalTime = 3.47776s; TotalTimePerSample = 1.35850ms; SamplesPerSecond = 736
+ Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.16166687; err = 0.35664062; TotalTime = 2.99925s; TotalTimePerSample = 1.17158ms; SamplesPerSecond = 853
+ Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.18268433; err = 0.35820313; TotalTime = 3.23501s; TotalTimePerSample = 1.26368ms; SamplesPerSecond = 791
+ Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.16473083; err = 0.35195312; TotalTime = 3.21572s; TotalTimePerSample = 1.25614ms; SamplesPerSecond = 796
+ Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.17591248; err = 0.35195312; TotalTime = 3.07276s; TotalTimePerSample = 1.20030ms; SamplesPerSecond = 833
+Finished Epoch[ 1 of 2]: [Training] ce = 1.491325; err = 0.42381594; learningRatePerSample = 0.003125000047; EpochTime=106.67356
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14145651; EvalErrorPrediction = 0.35078125; TotalTime = 3.20015s; TotalTimePerSample = 1.25006ms; SamplesPerSecond = 799
- Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17236452; EvalErrorPrediction = 0.35703125; TotalTime = 3.58645s; TotalTimePerSample = 1.40096ms; SamplesPerSecond = 713
- Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23701782; EvalErrorPrediction = 0.37890625; TotalTime = 3.13229s; TotalTimePerSample = 1.22355ms; SamplesPerSecond = 817
- Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18644638; EvalErrorPrediction = 0.36132812; TotalTime = 3.23432s; TotalTimePerSample = 1.26341ms; SamplesPerSecond = 791
- Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12840500; EvalErrorPrediction = 0.34140625; TotalTime = 3.31180s; TotalTimePerSample = 1.29367ms; SamplesPerSecond = 772
- Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21571541; EvalErrorPrediction = 0.37031250; TotalTime = 3.27370s; TotalTimePerSample = 1.27879ms; SamplesPerSecond = 781
- Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14553375; EvalErrorPrediction = 0.34257813; TotalTime = 3.35806s; TotalTimePerSample = 1.31174ms; SamplesPerSecond = 762
- Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12870712; EvalErrorPrediction = 0.34453125; TotalTime = 3.07358s; TotalTimePerSample = 1.20062ms; SamplesPerSecond = 832
- Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15200119; EvalErrorPrediction = 0.34179688; TotalTime = 3.26694s; TotalTimePerSample = 1.27615ms; SamplesPerSecond = 783
- Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12955780; EvalErrorPrediction = 0.35312500; TotalTime = 3.26072s; TotalTimePerSample = 1.27372ms; SamplesPerSecond = 785
- Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15128708; EvalErrorPrediction = 0.34414062; TotalTime = 3.13385s; TotalTimePerSample = 1.22416ms; SamplesPerSecond = 816
- Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13170090; EvalErrorPrediction = 0.34414062; TotalTime = 3.24222s; TotalTimePerSample = 1.26649ms; SamplesPerSecond = 789
- Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10571136; EvalErrorPrediction = 0.34296875; TotalTime = 3.26617s; TotalTimePerSample = 1.27585ms; SamplesPerSecond = 783
- Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06392975; EvalErrorPrediction = 0.32695313; TotalTime = 2.93591s; TotalTimePerSample = 1.14684ms; SamplesPerSecond = 871
- Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.05555420; EvalErrorPrediction = 0.31835938; TotalTime = 3.06553s; TotalTimePerSample = 1.19747ms; SamplesPerSecond = 835
- Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06729126; EvalErrorPrediction = 0.32460937; TotalTime = 3.01623s; TotalTimePerSample = 1.17822ms; SamplesPerSecond = 848
- Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14530792; EvalErrorPrediction = 0.34687500; TotalTime = 3.26534s; TotalTimePerSample = 1.27552ms; SamplesPerSecond = 783
- Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14569397; EvalErrorPrediction = 0.35625000; TotalTime = 3.23646s; TotalTimePerSample = 1.26424ms; SamplesPerSecond = 790
- Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07375793; EvalErrorPrediction = 0.32929687; TotalTime = 3.50725s; TotalTimePerSample = 1.37002ms; SamplesPerSecond = 729
- Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07886505; EvalErrorPrediction = 0.33671875; TotalTime = 3.40044s; TotalTimePerSample = 1.32829ms; SamplesPerSecond = 752
- Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06901398; EvalErrorPrediction = 0.33164063; TotalTime = 3.25460s; TotalTimePerSample = 1.27133ms; SamplesPerSecond = 786
- Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09929962; EvalErrorPrediction = 0.33437500; TotalTime = 3.16798s; TotalTimePerSample = 1.23749ms; SamplesPerSecond = 808
- Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12124939; EvalErrorPrediction = 0.34531250; TotalTime = 3.56923s; TotalTimePerSample = 1.39423ms; SamplesPerSecond = 717
- Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13580627; EvalErrorPrediction = 0.35937500; TotalTime = 3.10338s; TotalTimePerSample = 1.21226ms; SamplesPerSecond = 824
- Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09370117; EvalErrorPrediction = 0.33515625; TotalTime = 3.33700s; TotalTimePerSample = 1.30352ms; SamplesPerSecond = 767
- Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07206116; EvalErrorPrediction = 0.33359375; TotalTime = 3.24466s; TotalTimePerSample = 1.26745ms; SamplesPerSecond = 788
- Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06741028; EvalErrorPrediction = 0.33359375; TotalTime = 3.38665s; TotalTimePerSample = 1.32291ms; SamplesPerSecond = 755
- Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10143433; EvalErrorPrediction = 0.32734375; TotalTime = 3.31745s; TotalTimePerSample = 1.29588ms; SamplesPerSecond = 771
- Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.02370911; EvalErrorPrediction = 0.31757812; TotalTime = 3.08865s; TotalTimePerSample = 1.20650ms; SamplesPerSecond = 828
- Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.08736572; EvalErrorPrediction = 0.33984375; TotalTime = 3.50527s; TotalTimePerSample = 1.36924ms; SamplesPerSecond = 730
- Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06789551; EvalErrorPrediction = 0.32890625; TotalTime = 3.27224s; TotalTimePerSample = 1.27822ms; SamplesPerSecond = 782
- Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.05290527; EvalErrorPrediction = 0.33398438; TotalTime = 3.47792s; TotalTimePerSample = 1.35856ms; SamplesPerSecond = 736
-Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.114005; EvalErrorPrediction = 0.3416504; learningRatePerSample  = 0.003125000047; EpochTime=104.51539
+ Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  1.14145651; err = 0.35078125; TotalTime = 3.20015s; TotalTimePerSample = 1.25006ms; SamplesPerSecond = 799
+ Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  1.17236452; err = 0.35703125; TotalTime = 3.58645s; TotalTimePerSample = 1.40096ms; SamplesPerSecond = 713
+ Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  1.23701782; err = 0.37890625; TotalTime = 3.13229s; TotalTimePerSample = 1.22355ms; SamplesPerSecond = 817
+ Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.18644638; err = 0.36132812; TotalTime = 3.23432s; TotalTimePerSample = 1.26341ms; SamplesPerSecond = 791
+ Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.12840500; err = 0.34140625; TotalTime = 3.31180s; TotalTimePerSample = 1.29367ms; SamplesPerSecond = 772
+ Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.21571541; err = 0.37031250; TotalTime = 3.27370s; TotalTimePerSample = 1.27879ms; SamplesPerSecond = 781
+ Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.14553375; err = 0.34257813; TotalTime = 3.35806s; TotalTimePerSample = 1.31174ms; SamplesPerSecond = 762
+ Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.12870712; err = 0.34453125; TotalTime = 3.07358s; TotalTimePerSample = 1.20062ms; SamplesPerSecond = 832
+ Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.15200119; err = 0.34179688; TotalTime = 3.26694s; TotalTimePerSample = 1.27615ms; SamplesPerSecond = 783
+ Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.12955780; err = 0.35312500; TotalTime = 3.26072s; TotalTimePerSample = 1.27372ms; SamplesPerSecond = 785
+ Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.15128708; err = 0.34414062; TotalTime = 3.13385s; TotalTimePerSample = 1.22416ms; SamplesPerSecond = 816
+ Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.13170090; err = 0.34414062; TotalTime = 3.24222s; TotalTimePerSample = 1.26649ms; SamplesPerSecond = 789
+ Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.10571136; err = 0.34296875; TotalTime = 3.26617s; TotalTimePerSample = 1.27585ms; SamplesPerSecond = 783
+ Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.06392975; err = 0.32695313; TotalTime = 2.93591s; TotalTimePerSample = 1.14684ms; SamplesPerSecond = 871
+ Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.05555420; err = 0.31835938; TotalTime = 3.06553s; TotalTimePerSample = 1.19747ms; SamplesPerSecond = 835
+ Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.06729126; err = 0.32460937; TotalTime = 3.01623s; TotalTimePerSample = 1.17822ms; SamplesPerSecond = 848
+ Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.14530792; err = 0.34687500; TotalTime = 3.26534s; TotalTimePerSample = 1.27552ms; SamplesPerSecond = 783
+ Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.14569397; err = 0.35625000; TotalTime = 3.23646s; TotalTimePerSample = 1.26424ms; SamplesPerSecond = 790
+ Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.07375793; err = 0.32929687; TotalTime = 3.50725s; TotalTimePerSample = 1.37002ms; SamplesPerSecond = 729
+ Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.07886505; err = 0.33671875; TotalTime = 3.40044s; TotalTimePerSample = 1.32829ms; SamplesPerSecond = 752
+ Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.06901398; err = 0.33164063; TotalTime = 3.25460s; TotalTimePerSample = 1.27133ms; SamplesPerSecond = 786
+ Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.09929962; err = 0.33437500; TotalTime = 3.16798s; TotalTimePerSample = 1.23749ms; SamplesPerSecond = 808
+ Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.12124939; err = 0.34531250; TotalTime = 3.56923s; TotalTimePerSample = 1.39423ms; SamplesPerSecond = 717
+ Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.13580627; err = 0.35937500; TotalTime = 3.10338s; TotalTimePerSample = 1.21226ms; SamplesPerSecond = 824
+ Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.09370117; err = 0.33515625; TotalTime = 3.33700s; TotalTimePerSample = 1.30352ms; SamplesPerSecond = 767
+ Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.07206116; err = 0.33359375; TotalTime = 3.24466s; TotalTimePerSample = 1.26745ms; SamplesPerSecond = 788
+ Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.06741028; err = 0.33359375; TotalTime = 3.38665s; TotalTimePerSample = 1.32291ms; SamplesPerSecond = 755
+ Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.10143433; err = 0.32734375; TotalTime = 3.31745s; TotalTimePerSample = 1.29588ms; SamplesPerSecond = 771
+ Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.02370911; err = 0.31757812; TotalTime = 3.08865s; TotalTimePerSample = 1.20650ms; SamplesPerSecond = 828
+ Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.08736572; err = 0.33984375; TotalTime = 3.50527s; TotalTimePerSample = 1.36924ms; SamplesPerSecond = 730
+ Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.06789551; err = 0.32890625; TotalTime = 3.27224s; TotalTimePerSample = 1.27822ms; SamplesPerSecond = 782
+ Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.05290527; err = 0.33398438; TotalTime = 3.47792s; TotalTimePerSample = 1.35856ms; SamplesPerSecond = 736
+Finished Epoch[ 2 of 2]: [Training] ce = 1.114005; err = 0.3416504; learningRatePerSample = 0.003125000047; EpochTime=104.51539
 CNTKCommandTrainEnd: DPT_Pre2
 
 
@@ -3168,101 +3168,101 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 4]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  4.01592903; EvalErrorPrediction = 0.82421875; TotalTime = 3.95135s; TotalTimePerSample = 1.54350ms; SamplesPerSecond = 647
- Epoch[ 1 of 4]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.63751793; EvalErrorPrediction = 0.63789063; TotalTime = 1.56999s; TotalTimePerSample = 0.61328ms; SamplesPerSecond = 1630
- Epoch[ 1 of 4]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.03013535; EvalErrorPrediction = 0.54843750; TotalTime = 1.09485s; TotalTimePerSample = 0.42768ms; SamplesPerSecond = 2338
- Epoch[ 1 of 4]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.74010468; EvalErrorPrediction = 0.47148438; TotalTime = 1.14739s; TotalTimePerSample = 0.44820ms; SamplesPerSecond = 2231
- Epoch[ 1 of 4]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.56799011; EvalErrorPrediction = 0.44335938; TotalTime = 1.09577s; TotalTimePerSample = 0.42803ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.46781845; EvalErrorPrediction = 0.41328125; TotalTime = 1.09510s; TotalTimePerSample = 0.42777ms; SamplesPerSecond = 2337
- Epoch[ 1 of 4]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.43012238; EvalErrorPrediction = 0.40585938; TotalTime = 1.09635s; TotalTimePerSample = 0.42826ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.35918579; EvalErrorPrediction = 0.40039062; TotalTime = 1.09548s; TotalTimePerSample = 0.42792ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.28387451; EvalErrorPrediction = 0.37812500; TotalTime = 1.09595s; TotalTimePerSample = 0.42811ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.29969482; EvalErrorPrediction = 0.39726563; TotalTime = 1.09572s; TotalTimePerSample = 0.42802ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27851257; EvalErrorPrediction = 0.38945313; TotalTime = 1.09571s; TotalTimePerSample = 0.42801ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27491150; EvalErrorPrediction = 0.38476562; TotalTime = 1.12142s; TotalTimePerSample = 0.43806ms; SamplesPerSecond = 2282
- Epoch[ 1 of 4]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.24143066; EvalErrorPrediction = 0.38164063; TotalTime = 1.09592s; TotalTimePerSample = 0.42809ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31309967; EvalErrorPrediction = 0.38867188; TotalTime = 1.09608s; TotalTimePerSample = 0.42816ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.24519043; EvalErrorPrediction = 0.36054687; TotalTime = 1.09577s; TotalTimePerSample = 0.42804ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.26173248; EvalErrorPrediction = 0.38554688; TotalTime = 1.13408s; TotalTimePerSample = 0.44300ms; SamplesPerSecond = 2257
- Epoch[ 1 of 4]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20199585; EvalErrorPrediction = 0.35390625; TotalTime = 1.11229s; TotalTimePerSample = 0.43449ms; SamplesPerSecond = 2301
- Epoch[ 1 of 4]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21707153; EvalErrorPrediction = 0.37031250; TotalTime = 1.09594s; TotalTimePerSample = 0.42810ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21532898; EvalErrorPrediction = 0.37382813; TotalTime = 1.09629s; TotalTimePerSample = 0.42824ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20324097; EvalErrorPrediction = 0.37187500; TotalTime = 1.10557s; TotalTimePerSample = 0.43186ms; SamplesPerSecond = 2315
- Epoch[ 1 of 4]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20709839; EvalErrorPrediction = 0.37343750; TotalTime = 1.09610s; TotalTimePerSample = 0.42816ms; SamplesPerSecond = 2335
- Epoch[ 1 of 4]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14372253; EvalErrorPrediction = 0.33828125; TotalTime = 1.09640s; TotalTimePerSample = 0.42828ms; SamplesPerSecond = 2334
- Epoch[ 1 of 4]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15016785; EvalErrorPrediction = 0.34492187; TotalTime = 1.09523s; TotalTimePerSample = 0.42782ms; SamplesPerSecond = 2337
- Epoch[ 1 of 4]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19444275; EvalErrorPrediction = 0.35546875; TotalTime = 1.09550s; TotalTimePerSample = 0.42793ms; SamplesPerSecond = 2336
- Epoch[ 1 of 4]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16820984; EvalErrorPrediction = 0.35937500; TotalTime = 1.10647s; TotalTimePerSample = 0.43222ms; SamplesPerSecond = 2313
- Epoch[ 1 of 4]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09052429; EvalErrorPrediction = 0.34062500; TotalTime = 1.11512s; TotalTimePerSample = 0.43559ms; SamplesPerSecond = 2295
- Epoch[ 1 of 4]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11017761; EvalErrorPrediction = 0.34140625; TotalTime = 1.09791s; TotalTimePerSample = 0.42887ms; SamplesPerSecond = 2331
- Epoch[ 1 of 4]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17647095; EvalErrorPrediction = 0.34960938; TotalTime = 1.09768s; TotalTimePerSample = 0.42878ms; SamplesPerSecond = 2332
- Epoch[ 1 of 4]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12259827; EvalErrorPrediction = 0.34960938; TotalTime = 1.09812s; TotalTimePerSample = 0.42895ms; SamplesPerSecond = 2331
- Epoch[ 1 of 4]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14627686; EvalErrorPrediction = 0.34882812; TotalTime = 1.09796s; TotalTimePerSample = 0.42889ms; SamplesPerSecond = 2331
- Epoch[ 1 of 4]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12782593; EvalErrorPrediction = 0.34296875; TotalTime = 1.11678s; TotalTimePerSample = 0.43624ms; SamplesPerSecond = 2292
- Epoch[ 1 of 4]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12669983; EvalErrorPrediction = 0.34453125; TotalTime = 1.08905s; TotalTimePerSample = 0.42541ms; SamplesPerSecond = 2350
-Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 1.4077845; EvalErrorPrediction = 0.40218505; learningRatePerSample  = 0.003125000047; EpochTime=39.654781
+ Epoch[ 1 of 4]-Minibatch[   1-  10 of 320]: * 2560; ce =  4.01592903; err = 0.82421875; TotalTime = 3.95135s; TotalTimePerSample = 1.54350ms; SamplesPerSecond = 647
+ Epoch[ 1 of 4]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.63751793; err = 0.63789063; TotalTime = 1.56999s; TotalTimePerSample = 0.61328ms; SamplesPerSecond = 1630
+ Epoch[ 1 of 4]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.03013535; err = 0.54843750; TotalTime = 1.09485s; TotalTimePerSample = 0.42768ms; SamplesPerSecond = 2338
+ Epoch[ 1 of 4]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.74010468; err = 0.47148438; TotalTime = 1.14739s; TotalTimePerSample = 0.44820ms; SamplesPerSecond = 2231
+ Epoch[ 1 of 4]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.56799011; err = 0.44335938; TotalTime = 1.09577s; TotalTimePerSample = 0.42803ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.46781845; err = 0.41328125; TotalTime = 1.09510s; TotalTimePerSample = 0.42777ms; SamplesPerSecond = 2337
+ Epoch[ 1 of 4]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.43012238; err = 0.40585938; TotalTime = 1.09635s; TotalTimePerSample = 0.42826ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.35918579; err = 0.40039062; TotalTime = 1.09548s; TotalTimePerSample = 0.42792ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.28387451; err = 0.37812500; TotalTime = 1.09595s; TotalTimePerSample = 0.42811ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.29969482; err = 0.39726563; TotalTime = 1.09572s; TotalTimePerSample = 0.42802ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.27851257; err = 0.38945313; TotalTime = 1.09571s; TotalTimePerSample = 0.42801ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.27491150; err = 0.38476562; TotalTime = 1.12142s; TotalTimePerSample = 0.43806ms; SamplesPerSecond = 2282
+ Epoch[ 1 of 4]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.24143066; err = 0.38164063; TotalTime = 1.09592s; TotalTimePerSample = 0.42809ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.31309967; err = 0.38867188; TotalTime = 1.09608s; TotalTimePerSample = 0.42816ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.24519043; err = 0.36054687; TotalTime = 1.09577s; TotalTimePerSample = 0.42804ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.26173248; err = 0.38554688; TotalTime = 1.13408s; TotalTimePerSample = 0.44300ms; SamplesPerSecond = 2257
+ Epoch[ 1 of 4]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.20199585; err = 0.35390625; TotalTime = 1.11229s; TotalTimePerSample = 0.43449ms; SamplesPerSecond = 2301
+ Epoch[ 1 of 4]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.21707153; err = 0.37031250; TotalTime = 1.09594s; TotalTimePerSample = 0.42810ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.21532898; err = 0.37382813; TotalTime = 1.09629s; TotalTimePerSample = 0.42824ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.20324097; err = 0.37187500; TotalTime = 1.10557s; TotalTimePerSample = 0.43186ms; SamplesPerSecond = 2315
+ Epoch[ 1 of 4]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.20709839; err = 0.37343750; TotalTime = 1.09610s; TotalTimePerSample = 0.42816ms; SamplesPerSecond = 2335
+ Epoch[ 1 of 4]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.14372253; err = 0.33828125; TotalTime = 1.09640s; TotalTimePerSample = 0.42828ms; SamplesPerSecond = 2334
+ Epoch[ 1 of 4]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.15016785; err = 0.34492187; TotalTime = 1.09523s; TotalTimePerSample = 0.42782ms; SamplesPerSecond = 2337
+ Epoch[ 1 of 4]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.19444275; err = 0.35546875; TotalTime = 1.09550s; TotalTimePerSample = 0.42793ms; SamplesPerSecond = 2336
+ Epoch[ 1 of 4]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.16820984; err = 0.35937500; TotalTime = 1.10647s; TotalTimePerSample = 0.43222ms; SamplesPerSecond = 2313
+ Epoch[ 1 of 4]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.09052429; err = 0.34062500; TotalTime = 1.11512s; TotalTimePerSample = 0.43559ms; SamplesPerSecond = 2295
+ Epoch[ 1 of 4]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.11017761; err = 0.34140625; TotalTime = 1.09791s; TotalTimePerSample = 0.42887ms; SamplesPerSecond = 2331
+ Epoch[ 1 of 4]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.17647095; err = 0.34960938; TotalTime = 1.09768s; TotalTimePerSample = 0.42878ms; SamplesPerSecond = 2332
+ Epoch[ 1 of 4]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.12259827; err = 0.34960938; TotalTime = 1.09812s; TotalTimePerSample = 0.42895ms; SamplesPerSecond = 2331
+ Epoch[ 1 of 4]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.14627686; err = 0.34882812; TotalTime = 1.09796s; TotalTimePerSample = 0.42889ms; SamplesPerSecond = 2331
+ Epoch[ 1 of 4]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.12782593; err = 0.34296875; TotalTime = 1.11678s; TotalTimePerSample = 0.43624ms; SamplesPerSecond = 2292
+ Epoch[ 1 of 4]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.12669983; err = 0.34453125; TotalTime = 1.08905s; TotalTimePerSample = 0.42541ms; SamplesPerSecond = 2350
+Finished Epoch[ 1 of 4]: [Training] ce = 1.4077845; err = 0.40218505; learningRatePerSample = 0.003125000047; EpochTime=39.654781
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.32465096; EvalErrorPrediction = 0.39160156; TotalTime = 1.80429s; TotalTimePerSample = 0.35240ms; SamplesPerSecond = 2837
- Epoch[ 2 of 4]-Minibatch[  11-  20 of 160]: * 5120; CrossEntropyWithSoftmax =  1.26754742; EvalErrorPrediction = 0.38085938; TotalTime = 1.81056s; TotalTimePerSample = 0.35363ms; SamplesPerSecond = 2827
- Epoch[ 2 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.17425861; EvalErrorPrediction = 0.35332031; TotalTime = 1.79388s; TotalTimePerSample = 0.35037ms; SamplesPerSecond = 2854
- Epoch[ 2 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  1.12545509; EvalErrorPrediction = 0.34492187; TotalTime = 1.79352s; TotalTimePerSample = 0.35030ms; SamplesPerSecond = 2854
- Epoch[ 2 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  1.13674049; EvalErrorPrediction = 0.34238281; TotalTime = 1.79283s; TotalTimePerSample = 0.35016ms; SamplesPerSecond = 2855
- Epoch[ 2 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  1.13298378; EvalErrorPrediction = 0.34746094; TotalTime = 1.79214s; TotalTimePerSample = 0.35003ms; SamplesPerSecond = 2856
- Epoch[ 2 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  1.07790451; EvalErrorPrediction = 0.33378906; TotalTime = 1.79264s; TotalTimePerSample = 0.35013ms; SamplesPerSecond = 2856
- Epoch[ 2 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  1.04510498; EvalErrorPrediction = 0.31738281; TotalTime = 1.84448s; TotalTimePerSample = 0.36025ms; SamplesPerSecond = 2775
- Epoch[ 2 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  1.15464554; EvalErrorPrediction = 0.35839844; TotalTime = 1.79575s; TotalTimePerSample = 0.35073ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  1.25793457; EvalErrorPrediction = 0.37714844; TotalTime = 1.79629s; TotalTimePerSample = 0.35084ms; SamplesPerSecond = 2850
- Epoch[ 2 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  1.20009308; EvalErrorPrediction = 0.37695312; TotalTime = 1.79532s; TotalTimePerSample = 0.35065ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11907349; EvalErrorPrediction = 0.34453125; TotalTime = 1.79536s; TotalTimePerSample = 0.35066ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  1.10299835; EvalErrorPrediction = 0.34179688; TotalTime = 1.79584s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  1.10662994; EvalErrorPrediction = 0.33437500; TotalTime = 1.79551s; TotalTimePerSample = 0.35069ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05814362; EvalErrorPrediction = 0.33378906; TotalTime = 1.79585s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
- Epoch[ 2 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05615540; EvalErrorPrediction = 0.32558594; TotalTime = 1.78428s; TotalTimePerSample = 0.34849ms; SamplesPerSecond = 2869
-Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 1.14627; EvalErrorPrediction = 0.35026857; learningRatePerSample  = 0.003125000047; EpochTime=28.798577
+ Epoch[ 2 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.32465096; err = 0.39160156; TotalTime = 1.80429s; TotalTimePerSample = 0.35240ms; SamplesPerSecond = 2837
+ Epoch[ 2 of 4]-Minibatch[  11-  20 of 160]: * 5120; ce =  1.26754742; err = 0.38085938; TotalTime = 1.81056s; TotalTimePerSample = 0.35363ms; SamplesPerSecond = 2827
+ Epoch[ 2 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.17425861; err = 0.35332031; TotalTime = 1.79388s; TotalTimePerSample = 0.35037ms; SamplesPerSecond = 2854
+ Epoch[ 2 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  1.12545509; err = 0.34492187; TotalTime = 1.79352s; TotalTimePerSample = 0.35030ms; SamplesPerSecond = 2854
+ Epoch[ 2 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  1.13674049; err = 0.34238281; TotalTime = 1.79283s; TotalTimePerSample = 0.35016ms; SamplesPerSecond = 2855
+ Epoch[ 2 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  1.13298378; err = 0.34746094; TotalTime = 1.79214s; TotalTimePerSample = 0.35003ms; SamplesPerSecond = 2856
+ Epoch[ 2 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  1.07790451; err = 0.33378906; TotalTime = 1.79264s; TotalTimePerSample = 0.35013ms; SamplesPerSecond = 2856
+ Epoch[ 2 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  1.04510498; err = 0.31738281; TotalTime = 1.84448s; TotalTimePerSample = 0.36025ms; SamplesPerSecond = 2775
+ Epoch[ 2 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  1.15464554; err = 0.35839844; TotalTime = 1.79575s; TotalTimePerSample = 0.35073ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  1.25793457; err = 0.37714844; TotalTime = 1.79629s; TotalTimePerSample = 0.35084ms; SamplesPerSecond = 2850
+ Epoch[ 2 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  1.20009308; err = 0.37695312; TotalTime = 1.79532s; TotalTimePerSample = 0.35065ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  1.11907349; err = 0.34453125; TotalTime = 1.79536s; TotalTimePerSample = 0.35066ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  1.10299835; err = 0.34179688; TotalTime = 1.79584s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  1.10662994; err = 0.33437500; TotalTime = 1.79551s; TotalTimePerSample = 0.35069ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  1.05814362; err = 0.33378906; TotalTime = 1.79585s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
+ Epoch[ 2 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  1.05615540; err = 0.32558594; TotalTime = 1.78428s; TotalTimePerSample = 0.34849ms; SamplesPerSecond = 2869
+Finished Epoch[ 2 of 4]: [Training] ce = 1.14627; err = 0.35026857; learningRatePerSample = 0.003125000047; EpochTime=28.798577
 Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 2: frames [163840..245760] (first utterance at frame 163840), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 3 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11535473; EvalErrorPrediction = 0.34570312; TotalTime = 1.79470s; TotalTimePerSample = 0.35053ms; SamplesPerSecond = 2852
- Epoch[ 3 of 4]-Minibatch[  11-  20 of 160]: * 5120; CrossEntropyWithSoftmax =  1.10306950; EvalErrorPrediction = 0.33984375; TotalTime = 1.79571s; TotalTimePerSample = 0.35072ms; SamplesPerSecond = 2851
- Epoch[ 3 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08655663; EvalErrorPrediction = 0.33750000; TotalTime = 1.85428s; TotalTimePerSample = 0.36216ms; SamplesPerSecond = 2761
- Epoch[ 3 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08894272; EvalErrorPrediction = 0.32910156; TotalTime = 1.79696s; TotalTimePerSample = 0.35097ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  1.12028427; EvalErrorPrediction = 0.34257813; TotalTime = 1.79705s; TotalTimePerSample = 0.35099ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08059044; EvalErrorPrediction = 0.33847656; TotalTime = 1.79691s; TotalTimePerSample = 0.35096ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09700623; EvalErrorPrediction = 0.34140625; TotalTime = 1.79658s; TotalTimePerSample = 0.35090ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08042450; EvalErrorPrediction = 0.32988281; TotalTime = 1.79673s; TotalTimePerSample = 0.35092ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  1.03139420; EvalErrorPrediction = 0.31875000; TotalTime = 1.79605s; TotalTimePerSample = 0.35079ms; SamplesPerSecond = 2850
- Epoch[ 3 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  1.04829330; EvalErrorPrediction = 0.31484375; TotalTime = 1.79666s; TotalTimePerSample = 0.35091ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05102234; EvalErrorPrediction = 0.32949219; TotalTime = 1.79625s; TotalTimePerSample = 0.35083ms; SamplesPerSecond = 2850
- Epoch[ 3 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08982849; EvalErrorPrediction = 0.34023437; TotalTime = 1.79698s; TotalTimePerSample = 0.35097ms; SamplesPerSecond = 2849
- Epoch[ 3 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05953979; EvalErrorPrediction = 0.31953125; TotalTime = 1.81837s; TotalTimePerSample = 0.35515ms; SamplesPerSecond = 2815
- Epoch[ 3 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02886810; EvalErrorPrediction = 0.32363281; TotalTime = 1.80412s; TotalTimePerSample = 0.35237ms; SamplesPerSecond = 2837
- Epoch[ 3 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  1.03884735; EvalErrorPrediction = 0.32656250; TotalTime = 1.79584s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
- Epoch[ 3 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02478790; EvalErrorPrediction = 0.31718750; TotalTime = 1.78394s; TotalTimePerSample = 0.34843ms; SamplesPerSecond = 2870
-Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.0715507; EvalErrorPrediction = 0.33092043; learningRatePerSample  = 0.003125000047; EpochTime=28.836488
+ Epoch[ 3 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.11535473; err = 0.34570312; TotalTime = 1.79470s; TotalTimePerSample = 0.35053ms; SamplesPerSecond = 2852
+ Epoch[ 3 of 4]-Minibatch[  11-  20 of 160]: * 5120; ce =  1.10306950; err = 0.33984375; TotalTime = 1.79571s; TotalTimePerSample = 0.35072ms; SamplesPerSecond = 2851
+ Epoch[ 3 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.08655663; err = 0.33750000; TotalTime = 1.85428s; TotalTimePerSample = 0.36216ms; SamplesPerSecond = 2761
+ Epoch[ 3 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  1.08894272; err = 0.32910156; TotalTime = 1.79696s; TotalTimePerSample = 0.35097ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  1.12028427; err = 0.34257813; TotalTime = 1.79705s; TotalTimePerSample = 0.35099ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  1.08059044; err = 0.33847656; TotalTime = 1.79691s; TotalTimePerSample = 0.35096ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  1.09700623; err = 0.34140625; TotalTime = 1.79658s; TotalTimePerSample = 0.35090ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  1.08042450; err = 0.32988281; TotalTime = 1.79673s; TotalTimePerSample = 0.35092ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  1.03139420; err = 0.31875000; TotalTime = 1.79605s; TotalTimePerSample = 0.35079ms; SamplesPerSecond = 2850
+ Epoch[ 3 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  1.04829330; err = 0.31484375; TotalTime = 1.79666s; TotalTimePerSample = 0.35091ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  1.05102234; err = 0.32949219; TotalTime = 1.79625s; TotalTimePerSample = 0.35083ms; SamplesPerSecond = 2850
+ Epoch[ 3 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  1.08982849; err = 0.34023437; TotalTime = 1.79698s; TotalTimePerSample = 0.35097ms; SamplesPerSecond = 2849
+ Epoch[ 3 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  1.05953979; err = 0.31953125; TotalTime = 1.81837s; TotalTimePerSample = 0.35515ms; SamplesPerSecond = 2815
+ Epoch[ 3 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  1.02886810; err = 0.32363281; TotalTime = 1.80412s; TotalTimePerSample = 0.35237ms; SamplesPerSecond = 2837
+ Epoch[ 3 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  1.03884735; err = 0.32656250; TotalTime = 1.79584s; TotalTimePerSample = 0.35075ms; SamplesPerSecond = 2851
+ Epoch[ 3 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  1.02478790; err = 0.31718750; TotalTime = 1.78394s; TotalTimePerSample = 0.34843ms; SamplesPerSecond = 2870
+Finished Epoch[ 3 of 4]: [Training] ce = 1.0715507; err = 0.33092043; learningRatePerSample = 0.003125000047; EpochTime=28.836488
 Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 3: frames [245760..327680] (first utterance at frame 245760), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 4 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02606211; EvalErrorPrediction = 0.31562500; TotalTime = 1.79489s; TotalTimePerSample = 0.35056ms; SamplesPerSecond = 2852
- Epoch[ 4 of 4]-Minibatch[  11-  20 of 160]: * 4926; CrossEntropyWithSoftmax =  1.03991783; EvalErrorPrediction = 0.32216809; TotalTime = 1.95701s; TotalTimePerSample = 0.39728ms; SamplesPerSecond = 2517
- Epoch[ 4 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.01588020; EvalErrorPrediction = 0.31601563; TotalTime = 1.79596s; TotalTimePerSample = 0.35077ms; SamplesPerSecond = 2850
- Epoch[ 4 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99285030; EvalErrorPrediction = 0.30957031; TotalTime = 1.79623s; TotalTimePerSample = 0.35083ms; SamplesPerSecond = 2850
- Epoch[ 4 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99535828; EvalErrorPrediction = 0.31640625; TotalTime = 1.79578s; TotalTimePerSample = 0.35074ms; SamplesPerSecond = 2851
- Epoch[ 4 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02987175; EvalErrorPrediction = 0.32539062; TotalTime = 1.79591s; TotalTimePerSample = 0.35076ms; SamplesPerSecond = 2850
- Epoch[ 4 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  1.03030205; EvalErrorPrediction = 0.31582031; TotalTime = 1.79514s; TotalTimePerSample = 0.35061ms; SamplesPerSecond = 2852
- Epoch[ 4 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  0.98913116; EvalErrorPrediction = 0.31210938; TotalTime = 1.79670s; TotalTimePerSample = 0.35092ms; SamplesPerSecond = 2849
- Epoch[ 4 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  0.98892746; EvalErrorPrediction = 0.30546875; TotalTime = 1.79605s; TotalTimePerSample = 0.35079ms; SamplesPerSecond = 2850
- Epoch[ 4 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  1.00130386; EvalErrorPrediction = 0.30917969; TotalTime = 1.80031s; TotalTimePerSample = 0.35162ms; SamplesPerSecond = 2843
- Epoch[ 4 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02166672; EvalErrorPrediction = 0.31054688; TotalTime = 1.79190s; TotalTimePerSample = 0.34998ms; SamplesPerSecond = 2857
- Epoch[ 4 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02634430; EvalErrorPrediction = 0.32597656; TotalTime = 1.79346s; TotalTimePerSample = 0.35029ms; SamplesPerSecond = 2854
- Epoch[ 4 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  0.97695312; EvalErrorPrediction = 0.29863281; TotalTime = 1.79173s; TotalTimePerSample = 0.34995ms; SamplesPerSecond = 2857
- Epoch[ 4 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  0.97142334; EvalErrorPrediction = 0.30546875; TotalTime = 1.79446s; TotalTimePerSample = 0.35048ms; SamplesPerSecond = 2853
- Epoch[ 4 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  0.98984375; EvalErrorPrediction = 0.31269531; TotalTime = 1.79283s; TotalTimePerSample = 0.35016ms; SamplesPerSecond = 2855
- Epoch[ 4 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  0.97683563; EvalErrorPrediction = 0.30175781; TotalTime = 1.78603s; TotalTimePerSample = 0.34883ms; SamplesPerSecond = 2866
-Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.0043954; EvalErrorPrediction = 0.31276855; learningRatePerSample  = 0.003125000047; EpochTime=28.984064
+ Epoch[ 4 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.02606211; err = 0.31562500; TotalTime = 1.79489s; TotalTimePerSample = 0.35056ms; SamplesPerSecond = 2852
+ Epoch[ 4 of 4]-Minibatch[  11-  20 of 160]: * 4926; ce =  1.03991783; err = 0.32216809; TotalTime = 1.95701s; TotalTimePerSample = 0.39728ms; SamplesPerSecond = 2517
+ Epoch[ 4 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.01588020; err = 0.31601563; TotalTime = 1.79596s; TotalTimePerSample = 0.35077ms; SamplesPerSecond = 2850
+ Epoch[ 4 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  0.99285030; err = 0.30957031; TotalTime = 1.79623s; TotalTimePerSample = 0.35083ms; SamplesPerSecond = 2850
+ Epoch[ 4 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  0.99535828; err = 0.31640625; TotalTime = 1.79578s; TotalTimePerSample = 0.35074ms; SamplesPerSecond = 2851
+ Epoch[ 4 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  1.02987175; err = 0.32539062; TotalTime = 1.79591s; TotalTimePerSample = 0.35076ms; SamplesPerSecond = 2850
+ Epoch[ 4 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  1.03030205; err = 0.31582031; TotalTime = 1.79514s; TotalTimePerSample = 0.35061ms; SamplesPerSecond = 2852
+ Epoch[ 4 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  0.98913116; err = 0.31210938; TotalTime = 1.79670s; TotalTimePerSample = 0.35092ms; SamplesPerSecond = 2849
+ Epoch[ 4 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  0.98892746; err = 0.30546875; TotalTime = 1.79605s; TotalTimePerSample = 0.35079ms; SamplesPerSecond = 2850
+ Epoch[ 4 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  1.00130386; err = 0.30917969; TotalTime = 1.80031s; TotalTimePerSample = 0.35162ms; SamplesPerSecond = 2843
+ Epoch[ 4 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  1.02166672; err = 0.31054688; TotalTime = 1.79190s; TotalTimePerSample = 0.34998ms; SamplesPerSecond = 2857
+ Epoch[ 4 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  1.02634430; err = 0.32597656; TotalTime = 1.79346s; TotalTimePerSample = 0.35029ms; SamplesPerSecond = 2854
+ Epoch[ 4 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  0.97695312; err = 0.29863281; TotalTime = 1.79173s; TotalTimePerSample = 0.34995ms; SamplesPerSecond = 2857
+ Epoch[ 4 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  0.97142334; err = 0.30546875; TotalTime = 1.79446s; TotalTimePerSample = 0.35048ms; SamplesPerSecond = 2853
+ Epoch[ 4 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  0.98984375; err = 0.31269531; TotalTime = 1.79283s; TotalTimePerSample = 0.35016ms; SamplesPerSecond = 2855
+ Epoch[ 4 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  0.97683563; err = 0.30175781; TotalTime = 1.78603s; TotalTimePerSample = 0.34883ms; SamplesPerSecond = 2866
+Finished Epoch[ 4 of 4]: [Training] ce = 1.0043954; err = 0.31276855; learningRatePerSample = 0.003125000047; EpochTime=28.984064
 CNTKCommandTrainEnd: speechTrain
 __COMPLETED__
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.gpu.txt
index fe1fef9f8..09dde067e 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/cntk_dpt.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu DeviceId=0 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/cntk_dpt.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -17,32 +17,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:17:08: -------------------------------------------------------------------
-05/03/2016 18:17:08: Build info: 
+05/03/2016 18:15:19: -------------------------------------------------------------------
+05/03/2016 18:15:19: Build info: 
 
-05/03/2016 18:17:08: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:17:08: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:17:08: 		Build type: release
-05/03/2016 18:17:08: 		Build target: GPU
-05/03/2016 18:17:08: 		With 1bit-SGD: no
-05/03/2016 18:17:08: 		Math lib: acml
-05/03/2016 18:17:08: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:17:08: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:17:08: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:17:08: 		Build Branch: HEAD
-05/03/2016 18:17:08: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:17:08: 		Built by philly on 18750d26eb32
-05/03/2016 18:17:08: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:17:08: -------------------------------------------------------------------
+05/03/2016 18:15:19: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:15:19: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:15:19: 		Build type: release
+05/03/2016 18:15:19: 		Build target: GPU
+05/03/2016 18:15:19: 		With 1bit-SGD: no
+05/03/2016 18:15:19: 		Math lib: acml
+05/03/2016 18:15:19: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:15:19: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:15:19: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:15:19: 		Build Branch: HEAD
+05/03/2016 18:15:19: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:15:19: 		Built by philly on 18750d26eb32
+05/03/2016 18:15:19: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:15:19: -------------------------------------------------------------------
 
-05/03/2016 18:17:08: Running on localhost at 2016/05/03 18:17:08
-05/03/2016 18:17:08: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/cntk_dpt.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu  DeviceId=0  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 18:15:19: Running on localhost at 2016/05/03 18:15:19
+05/03/2016 18:15:19: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/cntk_dpt.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 18:17:08: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:17:08: precision = "float"
+05/03/2016 18:15:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:15:19: precision = "float"
 deviceId = $DeviceId$
 command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
 ndlMacros = "$ConfigDir$/macros.txt"
@@ -129,22 +129,20 @@ reader = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:17:08: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:15:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:17:08: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:17:08: precision = "float"
+05/03/2016 18:15:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:15:19: precision = "float"
 deviceId = 0
 command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
-ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/macros.txt"
+ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/macros.txt"
 globalMeanPath   = "GlobalStats/mean.363"
 globalInvStdPath = "GlobalStats/var.363"
 globalPriorPath  = "GlobalStats/prior.132"
@@ -160,41 +158,41 @@ SGD = [
 ]
 dptPre1 = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn_1layer.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn_1layer.txt"
     ]
 ]
 addLayer2 = [    
     action = "edit"
     currLayer = 1
     newLayer = 2
-    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
-    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
-    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/add_layer.mel"
+    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
+    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/add_layer.mel"
 ]
 dptPre2 = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn_1layer.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn_1layer.txt"
     ]
 ]
 addLayer3 = [    
     action = "edit"
     currLayer = 2
     newLayer = 3
-    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
-    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
-    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/add_layer.mel"
+    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
+    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/add_layer.mel"
 ]
 speechTrain = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
     deviceId = 0
     traceLevel = 1
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn.txt"
     ]
     SGD = [
         epochSize = 81920
@@ -228,62 +226,60 @@ reader = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:17:08: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:15:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:17:08: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:15:19: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk_dpt.cntk:addLayer2=[    
     action = "edit"
     currLayer = 1
     newLayer = 2
-    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
-    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
-    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/add_layer.mel"
+    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
+    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/add_layer.mel"
 ]
 
 configparameters: cntk_dpt.cntk:addLayer3=[    
     action = "edit"
     currLayer = 2
     newLayer = 3
-    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
-    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
-    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/add_layer.mel"
+    currModel = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    newModel  = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
+    editPath  = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/add_layer.mel"
 ]
 
 configparameters: cntk_dpt.cntk:command=dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
-configparameters: cntk_dpt.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining
+configparameters: cntk_dpt.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining
 configparameters: cntk_dpt.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk_dpt.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk_dpt.cntk:deviceId=0
 configparameters: cntk_dpt.cntk:dptPre1=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn_1layer.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn_1layer.txt"
     ]
 ]
 
 configparameters: cntk_dpt.cntk:dptPre2=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn_1layer.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn_1layer.txt"
     ]
 ]
 
 configparameters: cntk_dpt.cntk:globalInvStdPath=GlobalStats/var.363
 configparameters: cntk_dpt.cntk:globalMeanPath=GlobalStats/mean.363
 configparameters: cntk_dpt.cntk:globalPriorPath=GlobalStats/prior.132
-configparameters: cntk_dpt.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/macros.txt
-configparameters: cntk_dpt.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+configparameters: cntk_dpt.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/macros.txt
+configparameters: cntk_dpt.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 configparameters: cntk_dpt.cntk:precision=float
 configparameters: cntk_dpt.cntk:reader=[
     readerType = "HTKMLFReader"
@@ -302,9 +298,9 @@ configparameters: cntk_dpt.cntk:reader=[
         labelDim = 132
         labelType = "category"
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk_dpt.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu
+configparameters: cntk_dpt.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu
 configparameters: cntk_dpt.cntk:SGD=[
     epochSize = 81920
     minibatchSize = 256
@@ -317,11 +313,11 @@ configparameters: cntk_dpt.cntk:SGD=[
 
 configparameters: cntk_dpt.cntk:speechTrain=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
     deviceId = 0
     traceLevel = 1
     NDLNetworkBuilder = [
-        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/../../../DNN/DiscriminativePreTraining/dnn.txt"
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/DiscriminativePreTraining/dnn.txt"
     ]
     SGD = [
         epochSize = 81920
@@ -339,34 +335,33 @@ configparameters: cntk_dpt.cntk:speechTrain=[
 
 configparameters: cntk_dpt.cntk:timestamping=true
 configparameters: cntk_dpt.cntk:traceLevel=1
-05/03/2016 18:17:08: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:17:08: Commands: dptPre1 addLayer2 dptPre2 addLayer3 speechTrain
-05/03/2016 18:17:08: Precision = "float"
-05/03/2016 18:17:08: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech
-05/03/2016 18:17:08: CNTKCommandTrainInfo: dptPre1 : 2
-05/03/2016 18:17:08: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech
-05/03/2016 18:17:08: CNTKCommandTrainInfo: dptPre2 : 2
-05/03/2016 18:17:08: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech
-05/03/2016 18:17:08: CNTKCommandTrainInfo: speechTrain : 4
-05/03/2016 18:17:08: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 8
+05/03/2016 18:15:19: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:15:19: Commands: dptPre1 addLayer2 dptPre2 addLayer3 speechTrain
+05/03/2016 18:15:19: Precision = "float"
+05/03/2016 18:15:19: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech
+05/03/2016 18:15:19: CNTKCommandTrainInfo: dptPre1 : 2
+05/03/2016 18:15:19: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech
+05/03/2016 18:15:19: CNTKCommandTrainInfo: dptPre2 : 2
+05/03/2016 18:15:19: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech
+05/03/2016 18:15:19: CNTKCommandTrainInfo: speechTrain : 4
+05/03/2016 18:15:19: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 8
 
-05/03/2016 18:17:08: ##############################################################################
-05/03/2016 18:17:08: #                                                                            #
-05/03/2016 18:17:08: # Action "train"                                                             #
-05/03/2016 18:17:08: #                                                                            #
-05/03/2016 18:17:08: ##############################################################################
+05/03/2016 18:15:19: ##############################################################################
+05/03/2016 18:15:19: #                                                                            #
+05/03/2016 18:15:19: # Action "train"                                                             #
+05/03/2016 18:15:19: #                                                                            #
+05/03/2016 18:15:19: ##############################################################################
 
-05/03/2016 18:17:08: CNTKCommandTrainBegin: dptPre1
+05/03/2016 18:15:19: CNTKCommandTrainBegin: dptPre1
 NDLBuilder Using GPU 0
-Reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:17:08: Creating virgin network.
+05/03/2016 18:15:19: Creating virgin network.
 SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -409,14 +404,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:17:08: Created model with 19 nodes on GPU 0.
+05/03/2016 18:15:19: Created model with 19 nodes on GPU 0.
 
-05/03/2016 18:17:08: Training criterion node(s):
-05/03/2016 18:17:08: 	ce = CrossEntropyWithSoftmax
+05/03/2016 18:15:19: Training criterion node(s):
+05/03/2016 18:15:19: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 18:17:08: Evaluation criterion node(s):
+05/03/2016 18:15:19: Evaluation criterion node(s):
 
-05/03/2016 18:17:08: 	err = ErrorPrediction
+05/03/2016 18:15:19: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -424,117 +419,120 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[err Gradient[1]] [featNorm Gradient[363 x *]] [features Gradient[363 x *]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *]] }
-0x26a6618: {[features Value[363 x *]] }
-0x273c4a8: {[scaledLogLikelihood Value[132 x 1 x *]] }
-0x273c668: {[ce Value[1]] }
-0x273c968: {[logPrior Value[132 x 1]] }
-0x2f6ef18: {[globalMean Value[363 x 1]] }
-0x2f73ed8: {[labels Value[132 x *]] }
-0x33c7658: {[globalInvStd Value[363 x 1]] }
-0x33c8048: {[globalPrior Value[132 x 1]] }
-0x33c8a78: {[HL1.W Value[512 x 363]] }
-0x33c9ed8: {[HL1.b Value[512 x 1]] }
-0x33cb178: {[OL.W Value[132 x 512]] }
-0x33cb9a8: {[OL.b Value[132 x 1]] }
-0x34c15e8: {[err Value[1]] }
-0x34c2bb8: {[featNorm Value[363 x *]] }
-0x34c3128: {[HL1.t Value[512 x *]] }
-0x34c34d8: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *]] }
-0x34c3638: {[HL1.t Gradient[512 x *]] [HL1.y Value[512 x 1 x *]] }
-0x34c37f8: {[HL1.z Gradient[512 x 1 x *]] [OL.t Value[132 x 1 x *]] }
-0x34c39b8: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *]] }
-0x34c44c8: {[ce Gradient[1]] }
-0x34c4688: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *]] [OL.z Gradient[132 x 1 x *]] }
-0x34c4848: {[OL.t Gradient[132 x 1 x *]] }
-0x34c4a08: {[OL.b Gradient[132 x 1]] }
+0x2d601b8: {[features Value[363 x *]] }
+0x2e44598: {[scaledLogLikelihood Value[132 x 1 x *]] }
+0x2e44758: {[ce Value[1]] }
+0x2e449d8: {[featNorm Value[363 x *]] }
+0x36ffd58: {[labels Value[132 x *]] }
+0x3700918: {[globalMean Value[363 x 1]] }
+0x3701688: {[globalInvStd Value[363 x 1]] }
+0x3702068: {[globalPrior Value[132 x 1]] }
+0x3702a08: {[HL1.W Value[512 x 363]] }
+0x3702aa8: {[HL1.b Value[512 x 1]] }
+0x3704e18: {[OL.W Value[132 x 512]] }
+0x3705958: {[OL.b Value[132 x 1]] }
+0x3b56d28: {[err Value[1]] }
+0x3f154f8: {[HL1.t Value[512 x *]] }
+0x3f15678: {[logPrior Value[132 x 1]] }
+0x3f17308: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *]] }
+0x3f17408: {[HL1.t Gradient[512 x *]] [HL1.y Value[512 x 1 x *]] }
+0x3f17838: {[HL1.z Gradient[512 x 1 x *]] [OL.t Value[132 x 1 x *]] }
+0x3f17998: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *]] }
+0x3f18418: {[ce Gradient[1]] }
+0x3f185d8: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *]] [OL.z Gradient[132 x 1 x *]] }
+0x3f18798: {[OL.t Gradient[132 x 1 x *]] }
+0x3f18958: {[OL.b Gradient[132 x 1]] }
 
-05/03/2016 18:17:08: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:15:19: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:17:08: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 18:15:19: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:17:08: Starting minibatch loop.
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.12%]: ce = 3.74183846 * 2560; err = 0.80195313 * 2560; time = 0.2032s; samplesPerSecond = 12596.3
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.91124763 * 2560; err = 0.70898438 * 2560; time = 0.0132s; samplesPerSecond = 194632.4
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.58015976 * 2560; err = 0.66640625 * 2560; time = 0.0127s; samplesPerSecond = 200973.5
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 2.27427139 * 2560; err = 0.58750000 * 2560; time = 0.0129s; samplesPerSecond = 198326.6
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.62%]: ce = 2.05503616 * 2560; err = 0.56093750 * 2560; time = 0.0130s; samplesPerSecond = 197105.0
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.91055145 * 2560; err = 0.52812500 * 2560; time = 0.0130s; samplesPerSecond = 197668.1
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.81562653 * 2560; err = 0.51171875 * 2560; time = 0.0127s; samplesPerSecond = 201511.3
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.68803253 * 2560; err = 0.48476562 * 2560; time = 0.0148s; samplesPerSecond = 172902.9
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.57382050 * 2560; err = 0.45429687 * 2560; time = 0.0128s; samplesPerSecond = 200250.3
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.62090149 * 2560; err = 0.47304687 * 2560; time = 0.0127s; samplesPerSecond = 201352.8
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.59272461 * 2560; err = 0.47500000 * 2560; time = 0.0126s; samplesPerSecond = 202371.5
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.51520386 * 2560; err = 0.44531250 * 2560; time = 0.0123s; samplesPerSecond = 207421.8
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.49181976 * 2560; err = 0.45039062 * 2560; time = 0.0127s; samplesPerSecond = 201940.5
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.53703613 * 2560; err = 0.44804688 * 2560; time = 0.0125s; samplesPerSecond = 204882.0
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.43095398 * 2560; err = 0.41640625 * 2560; time = 0.0119s; samplesPerSecond = 214783.1
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.41503601 * 2560; err = 0.40078125 * 2560; time = 0.0121s; samplesPerSecond = 211570.2
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.38913574 * 2560; err = 0.41132812 * 2560; time = 0.0117s; samplesPerSecond = 219309.5
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.41207886 * 2560; err = 0.42226562 * 2560; time = 0.0113s; samplesPerSecond = 226890.0
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.39968262 * 2560; err = 0.40664062 * 2560; time = 0.0112s; samplesPerSecond = 228164.0
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.42729187 * 2560; err = 0.42617187 * 2560; time = 0.0118s; samplesPerSecond = 217520.6
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.41336365 * 2560; err = 0.42343750 * 2560; time = 0.0117s; samplesPerSecond = 218784.7
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.33186951 * 2560; err = 0.39960937 * 2560; time = 0.0119s; samplesPerSecond = 215488.2
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.28581238 * 2560; err = 0.38710937 * 2560; time = 0.0118s; samplesPerSecond = 217594.6
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.34127502 * 2560; err = 0.40976563 * 2560; time = 0.0118s; samplesPerSecond = 216637.0
-05/03/2016 18:17:08:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.32666016 * 2560; err = 0.39726563 * 2560; time = 0.0121s; samplesPerSecond = 211465.4
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.21437378 * 2560; err = 0.37265625 * 2560; time = 0.0117s; samplesPerSecond = 218002.2
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.23749695 * 2560; err = 0.37343750 * 2560; time = 0.0117s; samplesPerSecond = 218467.3
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.29956665 * 2560; err = 0.39023438 * 2560; time = 0.0116s; samplesPerSecond = 220328.8
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.21198120 * 2560; err = 0.37382813 * 2560; time = 0.0118s; samplesPerSecond = 216088.5
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.20528259 * 2560; err = 0.36718750 * 2560; time = 0.0116s; samplesPerSecond = 220918.2
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.23613586 * 2560; err = 0.37343750 * 2560; time = 0.0119s; samplesPerSecond = 214945.4
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.25615234 * 2560; err = 0.38164063 * 2560; time = 0.0119s; samplesPerSecond = 215180.3
-05/03/2016 18:17:09: Finished Epoch[ 1 of 2]: [Training] ce = 1.62945061 * 81920; err = 0.46030273 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.626423s
-05/03/2016 18:17:09: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech.1'
+05/03/2016 18:15:19: Starting minibatch loop.
+05/03/2016 18:15:19:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.12%]: ce = 3.74183846 * 2560; err = 0.80195313 * 2560; time = 0.1138s; samplesPerSecond = 22502.7
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.91124763 * 2560; err = 0.70898438 * 2560; time = 0.0078s; samplesPerSecond = 326280.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.58015976 * 2560; err = 0.66640625 * 2560; time = 0.0077s; samplesPerSecond = 331005.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 2.27427139 * 2560; err = 0.58750000 * 2560; time = 0.0077s; samplesPerSecond = 331005.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.62%]: ce = 2.05503616 * 2560; err = 0.56093750 * 2560; time = 0.0077s; samplesPerSecond = 333767.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.91055145 * 2560; err = 0.52812500 * 2560; time = 0.0077s; samplesPerSecond = 333203.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.81562653 * 2560; err = 0.51171875 * 2560; time = 0.0077s; samplesPerSecond = 333594.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.68803253 * 2560; err = 0.48476562 * 2560; time = 0.0077s; samplesPerSecond = 331348.7
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.57382050 * 2560; err = 0.45429687 * 2560; time = 0.0076s; samplesPerSecond = 335034.7
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.62090149 * 2560; err = 0.47304687 * 2560; time = 0.0076s; samplesPerSecond = 336178.6
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.59272461 * 2560; err = 0.47500000 * 2560; time = 0.0077s; samplesPerSecond = 331821.1
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.51520386 * 2560; err = 0.44531250 * 2560; time = 0.0076s; samplesPerSecond = 335210.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.49181976 * 2560; err = 0.45039062 * 2560; time = 0.0076s; samplesPerSecond = 335429.8
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.53703613 * 2560; err = 0.44804688 * 2560; time = 0.0078s; samplesPerSecond = 330152.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.43095398 * 2560; err = 0.41640625 * 2560; time = 0.0076s; samplesPerSecond = 334859.4
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.41503601 * 2560; err = 0.40078125 * 2560; time = 0.0077s; samplesPerSecond = 334640.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.38913574 * 2560; err = 0.41132812 * 2560; time = 0.0077s; samplesPerSecond = 334160.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.41207886 * 2560; err = 0.42226562 * 2560; time = 0.0077s; samplesPerSecond = 332467.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.39968262 * 2560; err = 0.40664062 * 2560; time = 0.0077s; samplesPerSecond = 334596.8
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.42729187 * 2560; err = 0.42617187 * 2560; time = 0.0077s; samplesPerSecond = 331177.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.41336365 * 2560; err = 0.42343750 * 2560; time = 0.0077s; samplesPerSecond = 334640.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.33186951 * 2560; err = 0.39960937 * 2560; time = 0.0077s; samplesPerSecond = 333594.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.28581238 * 2560; err = 0.38710937 * 2560; time = 0.0076s; samplesPerSecond = 335166.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.34127502 * 2560; err = 0.40976563 * 2560; time = 0.0077s; samplesPerSecond = 333594.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.32666016 * 2560; err = 0.39726563 * 2560; time = 0.0077s; samplesPerSecond = 333203.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.21437378 * 2560; err = 0.37265625 * 2560; time = 0.0077s; samplesPerSecond = 330365.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.23749695 * 2560; err = 0.37343750 * 2560; time = 0.0077s; samplesPerSecond = 330963.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.29956665 * 2560; err = 0.39023438 * 2560; time = 0.0077s; samplesPerSecond = 334509.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.21198120 * 2560; err = 0.37382813 * 2560; time = 0.0077s; samplesPerSecond = 333767.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.20528259 * 2560; err = 0.36718750 * 2560; time = 0.0077s; samplesPerSecond = 331907.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.23613586 * 2560; err = 0.37343750 * 2560; time = 0.0077s; samplesPerSecond = 332597.1
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.25615234 * 2560; err = 0.38164063 * 2560; time = 0.0077s; samplesPerSecond = 331348.7
+05/03/2016 18:15:20: Finished Epoch[ 1 of 2]: [Training] ce = 1.62945061 * 81920; err = 0.46030273 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.43802s
+05/03/2016 18:15:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech.1'
 
-05/03/2016 18:17:09: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 18:15:20: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:17:09: Starting minibatch loop.
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.12%]: ce = 1.23230953 * 2560; err = 0.38320312 * 2560; time = 0.0127s; samplesPerSecond = 201400.4
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.20511341 * 2560; err = 0.37421875 * 2560; time = 0.0118s; samplesPerSecond = 217446.7
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.28783760 * 2560; err = 0.37421875 * 2560; time = 0.0121s; samplesPerSecond = 212148.8
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.22809334 * 2560; err = 0.37421875 * 2560; time = 0.0118s; samplesPerSecond = 216490.5
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.18090286 * 2560; err = 0.35468750 * 2560; time = 0.0121s; samplesPerSecond = 211797.8
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.28175354 * 2560; err = 0.37695312 * 2560; time = 0.0118s; samplesPerSecond = 217391.3
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.22251205 * 2560; err = 0.37382813 * 2560; time = 0.0117s; samplesPerSecond = 218766.0
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.17863007 * 2560; err = 0.36328125 * 2560; time = 0.0122s; samplesPerSecond = 210682.2
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.23061218 * 2560; err = 0.35742188 * 2560; time = 0.0120s; samplesPerSecond = 212695.2
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.18048782 * 2560; err = 0.37578125 * 2560; time = 0.0116s; samplesPerSecond = 220537.6
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.19648056 * 2560; err = 0.35976562 * 2560; time = 0.0115s; samplesPerSecond = 223463.7
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.18896942 * 2560; err = 0.35429688 * 2560; time = 0.0115s; samplesPerSecond = 221760.2
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.16628113 * 2560; err = 0.35937500 * 2560; time = 0.0119s; samplesPerSecond = 214891.3
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.12856445 * 2560; err = 0.35195312 * 2560; time = 0.0117s; samplesPerSecond = 219422.3
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.10083466 * 2560; err = 0.32617188 * 2560; time = 0.0118s; samplesPerSecond = 216362.4
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.09875183 * 2560; err = 0.33906250 * 2560; time = 0.0117s; samplesPerSecond = 218299.7
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.18634949 * 2560; err = 0.35820313 * 2560; time = 0.0120s; samplesPerSecond = 213582.5
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.15709991 * 2560; err = 0.35195312 * 2560; time = 0.0117s; samplesPerSecond = 218169.4
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.10971069 * 2560; err = 0.34960938 * 2560; time = 0.0118s; samplesPerSecond = 217465.2
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.11317139 * 2560; err = 0.35000000 * 2560; time = 0.0123s; samplesPerSecond = 208435.1
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.08727722 * 2560; err = 0.32578125 * 2560; time = 0.0119s; samplesPerSecond = 215815.2
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.12296143 * 2560; err = 0.34101562 * 2560; time = 0.0122s; samplesPerSecond = 210128.9
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12966003 * 2560; err = 0.35078125 * 2560; time = 0.0123s; samplesPerSecond = 208639.0
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.27489319 * 2560; err = 0.39257812 * 2560; time = 0.0118s; samplesPerSecond = 216453.9
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.17423401 * 2560; err = 0.35156250 * 2560; time = 0.0118s; samplesPerSecond = 216179.7
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.13240051 * 2560; err = 0.35625000 * 2560; time = 0.0126s; samplesPerSecond = 203174.6
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.13792114 * 2560; err = 0.34335938 * 2560; time = 0.0124s; samplesPerSecond = 206651.6
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.13433228 * 2560; err = 0.33710937 * 2560; time = 0.0124s; samplesPerSecond = 207203.6
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.05835876 * 2560; err = 0.33710937 * 2560; time = 0.0265s; samplesPerSecond = 96647.5
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.09596558 * 2560; err = 0.33476563 * 2560; time = 0.0195s; samplesPerSecond = 130966.4
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.08180847 * 2560; err = 0.33242187 * 2560; time = 0.0162s; samplesPerSecond = 158024.7
-05/03/2016 18:17:09:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.06572876 * 2560; err = 0.33632812 * 2560; time = 0.0141s; samplesPerSecond = 181792.4
-05/03/2016 18:17:09: Finished Epoch[ 2 of 2]: [Training] ce = 1.16156273 * 81920; err = 0.35460205 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.417437s
-05/03/2016 18:17:09: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech'
-05/03/2016 18:17:09: CNTKCommandTrainEnd: dptPre1
+05/03/2016 18:15:20: Starting minibatch loop.
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.12%]: ce = 1.23230953 * 2560; err = 0.38320312 * 2560; time = 0.0092s; samplesPerSecond = 278897.5
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.20511341 * 2560; err = 0.37421875 * 2560; time = 0.0081s; samplesPerSecond = 317106.4
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.28783760 * 2560; err = 0.37421875 * 2560; time = 0.0080s; samplesPerSecond = 318645.8
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.22809334 * 2560; err = 0.37421875 * 2560; time = 0.0080s; samplesPerSecond = 321406.2
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.18090286 * 2560; err = 0.35468750 * 2560; time = 0.0080s; samplesPerSecond = 321003.1
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.28175354 * 2560; err = 0.37695312 * 2560; time = 0.0080s; samplesPerSecond = 321769.7
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.22251205 * 2560; err = 0.37382813 * 2560; time = 0.0080s; samplesPerSecond = 320802.0
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.17863007 * 2560; err = 0.36328125 * 2560; time = 0.0079s; samplesPerSecond = 322053.1
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.23061218 * 2560; err = 0.35742188 * 2560; time = 0.0080s; samplesPerSecond = 320721.6
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.18048782 * 2560; err = 0.37578125 * 2560; time = 0.0079s; samplesPerSecond = 322174.7
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.19648056 * 2560; err = 0.35976562 * 2560; time = 0.0080s; samplesPerSecond = 320480.7
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.18896942 * 2560; err = 0.35429688 * 2560; time = 0.0080s; samplesPerSecond = 321931.6
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.16628113 * 2560; err = 0.35937500 * 2560; time = 0.0080s; samplesPerSecond = 321608.0
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.12856445 * 2560; err = 0.35195312 * 2560; time = 0.0079s; samplesPerSecond = 322906.2
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.10083466 * 2560; err = 0.32617188 * 2560; time = 0.0080s; samplesPerSecond = 320240.2
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.09875183 * 2560; err = 0.33906250 * 2560; time = 0.0080s; samplesPerSecond = 320761.8
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.18634949 * 2560; err = 0.35820313 * 2560; time = 0.0080s; samplesPerSecond = 319361.3
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.15709991 * 2560; err = 0.35195312 * 2560; time = 0.0079s; samplesPerSecond = 322377.5
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.10971069 * 2560; err = 0.34960938 * 2560; time = 0.0080s; samplesPerSecond = 320802.0
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.11317139 * 2560; err = 0.35000000 * 2560; time = 0.0079s; samplesPerSecond = 322987.6
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.08727722 * 2560; err = 0.32578125 * 2560; time = 0.0078s; samplesPerSecond = 326697.3
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.12296143 * 2560; err = 0.34101562 * 2560; time = 0.0076s; samplesPerSecond = 335385.8
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12966003 * 2560; err = 0.35078125 * 2560; time = 0.0077s; samplesPerSecond = 332899.9
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.27489319 * 2560; err = 0.39257812 * 2560; time = 0.0077s; samplesPerSecond = 331434.5
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.17423401 * 2560; err = 0.35156250 * 2560; time = 0.0077s; samplesPerSecond = 331950.2
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.13240051 * 2560; err = 0.35625000 * 2560; time = 0.0077s; samplesPerSecond = 331778.1
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.13792114 * 2560; err = 0.34335938 * 2560; time = 0.0077s; samplesPerSecond = 333463.6
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.13433228 * 2560; err = 0.33710937 * 2560; time = 0.0077s; samplesPerSecond = 331477.4
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.05835876 * 2560; err = 0.33710937 * 2560; time = 0.0077s; samplesPerSecond = 331907.2
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.09596558 * 2560; err = 0.33476563 * 2560; time = 0.0080s; samplesPerSecond = 318764.8
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.08180847 * 2560; err = 0.33242187 * 2560; time = 0.0079s; samplesPerSecond = 325203.3
+05/03/2016 18:15:20:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.06572876 * 2560; err = 0.33632812 * 2560; time = 0.0079s; samplesPerSecond = 325741.2
+05/03/2016 18:15:20: Finished Epoch[ 2 of 2]: [Training] ce = 1.16156273 * 81920; err = 0.35460205 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.256937s
+05/03/2016 18:15:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech'
+05/03/2016 18:15:20: CNTKCommandTrainEnd: dptPre1
 
-05/03/2016 18:17:09: Action "train" complete.
+05/03/2016 18:15:20: Action "train" complete.
 
 
-05/03/2016 18:17:09: ##############################################################################
-05/03/2016 18:17:09: #                                                                            #
-05/03/2016 18:17:09: # Action "edit"                                                              #
-05/03/2016 18:17:09: #                                                                            #
-05/03/2016 18:17:09: ##############################################################################
+05/03/2016 18:15:20: ##############################################################################
+05/03/2016 18:15:20: #                                                                            #
+05/03/2016 18:15:20: # Action "edit"                                                              #
+05/03/2016 18:15:20: #                                                                            #
+05/03/2016 18:15:20: ##############################################################################
 
 
 Post-processing network...
@@ -624,26 +622,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 18:17:09: Action "edit" complete.
+05/03/2016 18:15:20: Action "edit" complete.
 
 
-05/03/2016 18:17:09: ##############################################################################
-05/03/2016 18:17:09: #                                                                            #
-05/03/2016 18:17:09: # Action "train"                                                             #
-05/03/2016 18:17:09: #                                                                            #
-05/03/2016 18:17:09: ##############################################################################
+05/03/2016 18:15:20: ##############################################################################
+05/03/2016 18:15:20: #                                                                            #
+05/03/2016 18:15:20: # Action "train"                                                             #
+05/03/2016 18:15:20: #                                                                            #
+05/03/2016 18:15:20: ##############################################################################
 
-05/03/2016 18:17:09: CNTKCommandTrainBegin: dptPre2
+05/03/2016 18:15:20: CNTKCommandTrainBegin: dptPre2
 NDLBuilder Using GPU 0
-Reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:17:09: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0'.
+05/03/2016 18:15:20: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0'.
 
 Post-processing network...
 
@@ -690,14 +687,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:17:09: Loaded model with 24 nodes on GPU 0.
+05/03/2016 18:15:20: Loaded model with 24 nodes on GPU 0.
 
-05/03/2016 18:17:09: Training criterion node(s):
-05/03/2016 18:17:09: 	ce = CrossEntropyWithSoftmax
+05/03/2016 18:15:20: Training criterion node(s):
+05/03/2016 18:15:20: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 18:17:09: Evaluation criterion node(s):
+05/03/2016 18:15:20: Evaluation criterion node(s):
 
-05/03/2016 18:17:09: 	err = ErrorPrediction
+05/03/2016 18:15:20: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -705,122 +702,125 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[err Gradient[1]] [featNorm Gradient[363 x *3]] [features Gradient[363 x *3]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *3]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *3]] }
-0x7f09fe822b88: {[HL1.b Value[512 x 1]] }
-0x7f09fe8250e8: {[HL1.t Gradient[512 x *3]] [HL1.y Value[512 x 1 x *3]] }
-0x7f09fe8252f8: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *3]] }
-0x7f09fe82d588: {[globalPrior Value[132 x 1]] }
-0x7f09fe8b3ee8: {[globalMean Value[363 x 1]] }
-0x7f09fe8c77d8: {[HL2.b Value[512 x 1]] }
-0x7f0a1061b298: {[OL.b Value[132 x 1]] }
-0x7f0a1062b958: {[labels Value[132 x *3]] }
-0x7f0a10644d48: {[features Value[363 x *3]] }
-0x7f0a10647f68: {[OL.t Gradient[132 x 1 x *3]] }
-0x7f0a1066e278: {[logPrior Value[132 x 1]] }
-0x7f0a1067c5a8: {[ce Gradient[1]] }
-0x7f0a1067c768: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *3]] [OL.z Gradient[132 x 1 x *3]] }
-0x7f0a10688948: {[OL.b Gradient[132 x 1]] }
-0x7f0a1068a938: {[HL1.z Gradient[512 x 1 x *3]] [HL2.t Value[512 x 1 x *3]] }
-0x7f0a1068aaf8: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *3]] }
-0x7f0a1068acb8: {[HL2.t Gradient[512 x 1 x *3]] [HL2.y Value[512 x 1 x *3]] }
-0x7f0a1068c9b8: {[HL1.W Value[512 x 363]] }
-0x7f0a1068dea8: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *3]] [HL2.z Gradient[512 x 1 x *3]] [OL.t Value[132 x 1 x *3]] }
-0x7f0a1068e068: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *3]] }
-0x7f0a1069a978: {[globalInvStd Value[363 x 1]] }
-0x7f0a106a1318: {[OL.W Value[132 x 512]] }
-0x7f0a106a18b8: {[HL2.W Value[512 x 512]] }
-0x7f0a106a2b58: {[HL1.t Value[512 x *3]] }
-0x7f0a106a6ec8: {[err Value[1]] }
-0x7f0a106a7088: {[scaledLogLikelihood Value[132 x 1 x *3]] }
-0x7f0a106a7248: {[ce Value[1]] }
-0x7f0a106aa8f8: {[featNorm Value[363 x *3]] }
+0x7f1df49781d8: {[features Value[363 x *3]] }
+0x7f1df49798e8: {[HL2.W Value[512 x 512]] }
+0x7f1df497ba28: {[labels Value[132 x *3]] }
+0x7f1df49809c8: {[globalMean Value[363 x 1]] }
+0x7f1df4be42e8: {[globalInvStd Value[363 x 1]] }
+0x7f1dfb32fde8: {[scaledLogLikelihood Value[132 x 1 x *3]] }
+0x7f1dfb32ffa8: {[ce Value[1]] }
+0x7f1dfb34da38: {[HL1.W Value[512 x 363]] }
+0x7f1dfb35ed68: {[HL1.t Value[512 x *3]] }
+0x7f1dfb35ee58: {[logPrior Value[132 x 1]] }
+0x7f1dfb387ef8: {[globalPrior Value[132 x 1]] }
+0x7f1dfb394ab8: {[ce Gradient[1]] }
+0x7f1dfb394c78: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *3]] [OL.z Gradient[132 x 1 x *3]] }
+0x7f1dfb394e38: {[OL.t Gradient[132 x 1 x *3]] }
+0x7f1dfb394ff8: {[OL.b Gradient[132 x 1]] }
+0x7f1dfb396b58: {[OL.b Value[132 x 1]] }
+0x7f1dfb396d18: {[OL.W Value[132 x 512]] }
+0x7f1dfb3987d8: {[HL2.b Value[512 x 1]] }
+0x7f1dfb39bb78: {[featNorm Value[363 x *3]] }
+0x7f1dfb39c338: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *3]] }
+0x7f1dfb39f348: {[err Value[1]] }
+0x7f1dfb39f9f8: {[HL1.t Gradient[512 x *3]] [HL1.y Value[512 x 1 x *3]] }
+0x7f1dfb39fbb8: {[HL1.z Gradient[512 x 1 x *3]] [HL2.t Value[512 x 1 x *3]] }
+0x7f1dfb39fd78: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *3]] }
+0x7f1dfb39ff38: {[HL2.t Gradient[512 x 1 x *3]] [HL2.y Value[512 x 1 x *3]] }
+0x7f1dfb3a00f8: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *3]] [HL2.z Gradient[512 x 1 x *3]] [OL.t Value[132 x 1 x *3]] }
+0x7f1dfb3a02b8: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *3]] }
+0x7f1dfb3a6468: {[HL1.b Value[512 x 1]] }
 
-05/03/2016 18:17:09: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:15:20: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:17:09: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 18:15:20: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:17:09: Starting minibatch loop.
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.12%]: ce = 4.30124588 * 2560; err = 0.80703125 * 2560; time = 0.1030s; samplesPerSecond = 24849.8
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.75448074 * 2560; err = 0.69960937 * 2560; time = 0.0143s; samplesPerSecond = 178995.9
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.20926208 * 2560; err = 0.58515625 * 2560; time = 0.0142s; samplesPerSecond = 179939.6
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.88578110 * 2560; err = 0.50117188 * 2560; time = 0.0142s; samplesPerSecond = 180497.8
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.71906204 * 2560; err = 0.47773437 * 2560; time = 0.0142s; samplesPerSecond = 179674.3
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.60130463 * 2560; err = 0.44648437 * 2560; time = 0.0142s; samplesPerSecond = 180269.0
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.56077118 * 2560; err = 0.45000000 * 2560; time = 0.0142s; samplesPerSecond = 180485.1
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.47116547 * 2560; err = 0.42460938 * 2560; time = 0.0142s; samplesPerSecond = 180434.2
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.38874512 * 2560; err = 0.40781250 * 2560; time = 0.0142s; samplesPerSecond = 179926.9
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.41911163 * 2560; err = 0.42539063 * 2560; time = 0.0143s; samplesPerSecond = 179171.3
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.38730774 * 2560; err = 0.42148438 * 2560; time = 0.0141s; samplesPerSecond = 181021.1
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.36617889 * 2560; err = 0.41015625 * 2560; time = 0.0143s; samplesPerSecond = 179636.5
-05/03/2016 18:17:09:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.33381653 * 2560; err = 0.40781250 * 2560; time = 0.0143s; samplesPerSecond = 178970.9
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.39802246 * 2560; err = 0.40546875 * 2560; time = 0.0145s; samplesPerSecond = 176527.4
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.33336182 * 2560; err = 0.40195313 * 2560; time = 0.0143s; samplesPerSecond = 178471.8
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.33834229 * 2560; err = 0.40195313 * 2560; time = 0.0145s; samplesPerSecond = 176090.2
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.26663208 * 2560; err = 0.37578125 * 2560; time = 0.0144s; samplesPerSecond = 178148.9
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.28086243 * 2560; err = 0.39296875 * 2560; time = 0.0144s; samplesPerSecond = 177248.5
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.29481506 * 2560; err = 0.39531250 * 2560; time = 0.0144s; samplesPerSecond = 178372.4
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.27625122 * 2560; err = 0.39375000 * 2560; time = 0.0144s; samplesPerSecond = 177322.2
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.26905518 * 2560; err = 0.38984375 * 2560; time = 0.0164s; samplesPerSecond = 155793.6
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.21494751 * 2560; err = 0.36250000 * 2560; time = 0.0141s; samplesPerSecond = 181174.8
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.20699158 * 2560; err = 0.36914062 * 2560; time = 0.0141s; samplesPerSecond = 181046.7
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.25002136 * 2560; err = 0.37851563 * 2560; time = 0.0142s; samplesPerSecond = 179851.1
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.22617187 * 2560; err = 0.37656250 * 2560; time = 0.0142s; samplesPerSecond = 180154.8
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.14840393 * 2560; err = 0.35468750 * 2560; time = 0.0142s; samplesPerSecond = 180803.7
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.16649780 * 2560; err = 0.35468750 * 2560; time = 0.0142s; samplesPerSecond = 180218.2
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.22885742 * 2560; err = 0.36992188 * 2560; time = 0.0143s; samplesPerSecond = 179586.1
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.16533203 * 2560; err = 0.36484375 * 2560; time = 0.0144s; samplesPerSecond = 178285.4
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.17502136 * 2560; err = 0.35664062 * 2560; time = 0.0145s; samplesPerSecond = 176466.5
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.16159058 * 2560; err = 0.35195312 * 2560; time = 0.0145s; samplesPerSecond = 176296.4
-05/03/2016 18:17:10:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.17113953 * 2560; err = 0.35429688 * 2560; time = 0.0144s; samplesPerSecond = 178074.6
-05/03/2016 18:17:10: Finished Epoch[ 1 of 2]: [Training] ce = 1.49907970 * 81920; err = 0.42547607 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.591209s
-05/03/2016 18:17:10: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.1'
+05/03/2016 18:15:20: Starting minibatch loop.
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.12%]: ce = 4.30124588 * 2560; err = 0.80703125 * 2560; time = 0.0151s; samplesPerSecond = 169930.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.75448074 * 2560; err = 0.69960937 * 2560; time = 0.0120s; samplesPerSecond = 212978.4
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.20926208 * 2560; err = 0.58515625 * 2560; time = 0.0120s; samplesPerSecond = 212889.8
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.88578110 * 2560; err = 0.50117188 * 2560; time = 0.0120s; samplesPerSecond = 213049.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.71906204 * 2560; err = 0.47773437 * 2560; time = 0.0120s; samplesPerSecond = 213155.7
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.60130463 * 2560; err = 0.44648437 * 2560; time = 0.0120s; samplesPerSecond = 213102.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.56077118 * 2560; err = 0.45000000 * 2560; time = 0.0120s; samplesPerSecond = 213120.2
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.47116547 * 2560; err = 0.42460938 * 2560; time = 0.0120s; samplesPerSecond = 213244.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.38874512 * 2560; err = 0.40781250 * 2560; time = 0.0120s; samplesPerSecond = 212819.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.41911163 * 2560; err = 0.42539063 * 2560; time = 0.0120s; samplesPerSecond = 212483.4
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.38730774 * 2560; err = 0.42148438 * 2560; time = 0.0120s; samplesPerSecond = 213138.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.36617889 * 2560; err = 0.41015625 * 2560; time = 0.0120s; samplesPerSecond = 213404.5
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.33381653 * 2560; err = 0.40781250 * 2560; time = 0.0120s; samplesPerSecond = 212819.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.39802246 * 2560; err = 0.40546875 * 2560; time = 0.0120s; samplesPerSecond = 213707.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.33336182 * 2560; err = 0.40195313 * 2560; time = 0.0119s; samplesPerSecond = 214603.1
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.33834229 * 2560; err = 0.40195313 * 2560; time = 0.0119s; samplesPerSecond = 214225.9
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.26663208 * 2560; err = 0.37578125 * 2560; time = 0.0120s; samplesPerSecond = 213868.0
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.28086243 * 2560; err = 0.39296875 * 2560; time = 0.0119s; samplesPerSecond = 214441.3
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.29481506 * 2560; err = 0.39531250 * 2560; time = 0.0119s; samplesPerSecond = 214315.6
+05/03/2016 18:15:20:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.27625122 * 2560; err = 0.39375000 * 2560; time = 0.0119s; samplesPerSecond = 214225.9
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.26905518 * 2560; err = 0.38984375 * 2560; time = 0.0120s; samplesPerSecond = 213653.8
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.21494751 * 2560; err = 0.36250000 * 2560; time = 0.0120s; samplesPerSecond = 214046.8
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.20699158 * 2560; err = 0.36914062 * 2560; time = 0.0120s; samplesPerSecond = 214154.3
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.25002136 * 2560; err = 0.37851563 * 2560; time = 0.0120s; samplesPerSecond = 214172.2
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.22617187 * 2560; err = 0.37656250 * 2560; time = 0.0120s; samplesPerSecond = 214136.3
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.14840393 * 2560; err = 0.35468750 * 2560; time = 0.0119s; samplesPerSecond = 214441.3
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.16649780 * 2560; err = 0.35468750 * 2560; time = 0.0119s; samplesPerSecond = 214585.1
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.22885742 * 2560; err = 0.36992188 * 2560; time = 0.0119s; samplesPerSecond = 214243.9
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.16533203 * 2560; err = 0.36484375 * 2560; time = 0.0120s; samplesPerSecond = 212907.5
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.17502136 * 2560; err = 0.35664062 * 2560; time = 0.0120s; samplesPerSecond = 212766.0
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.16159058 * 2560; err = 0.35195312 * 2560; time = 0.0120s; samplesPerSecond = 212978.4
+05/03/2016 18:15:21:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.17113953 * 2560; err = 0.35429688 * 2560; time = 0.0120s; samplesPerSecond = 213084.7
+05/03/2016 18:15:21: Finished Epoch[ 1 of 2]: [Training] ce = 1.49907970 * 81920; err = 0.42547607 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.471826s
+05/03/2016 18:15:21: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.1'
 
-05/03/2016 18:17:10: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 18:15:21: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:17:10: Starting minibatch loop.
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.12%]: ce = 1.14215403 * 2560; err = 0.34882812 * 2560; time = 0.0171s; samplesPerSecond = 149393.1
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.17049246 * 2560; err = 0.36328125 * 2560; time = 0.0164s; samplesPerSecond = 156116.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.24373856 * 2560; err = 0.37460938 * 2560; time = 0.0143s; samplesPerSecond = 178633.7
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.18655586 * 2560; err = 0.36445312 * 2560; time = 0.0146s; samplesPerSecond = 175607.1
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.13848000 * 2560; err = 0.35039063 * 2560; time = 0.0145s; samplesPerSecond = 177162.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.21884155 * 2560; err = 0.36757812 * 2560; time = 0.0145s; samplesPerSecond = 176856.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.14372940 * 2560; err = 0.35000000 * 2560; time = 0.0144s; samplesPerSecond = 177975.5
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.12769089 * 2560; err = 0.34960938 * 2560; time = 0.0146s; samplesPerSecond = 175270.4
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.14114227 * 2560; err = 0.33554688 * 2560; time = 0.0145s; samplesPerSecond = 176710.2
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.12445145 * 2560; err = 0.34843750 * 2560; time = 0.0145s; samplesPerSecond = 176868.9
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.14137878 * 2560; err = 0.34101562 * 2560; time = 0.0144s; samplesPerSecond = 178273.0
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.12705154 * 2560; err = 0.33867188 * 2560; time = 0.0145s; samplesPerSecond = 177040.1
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.10779419 * 2560; err = 0.34531250 * 2560; time = 0.0145s; samplesPerSecond = 176187.2
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.07003021 * 2560; err = 0.32500000 * 2560; time = 0.0147s; samplesPerSecond = 174732.1
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.05308990 * 2560; err = 0.31406250 * 2560; time = 0.0146s; samplesPerSecond = 175330.5
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.06392975 * 2560; err = 0.33085938 * 2560; time = 0.0145s; samplesPerSecond = 176917.8
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.14430847 * 2560; err = 0.35507813 * 2560; time = 0.0145s; samplesPerSecond = 176698.0
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.14809570 * 2560; err = 0.35859375 * 2560; time = 0.0144s; samplesPerSecond = 177383.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.08184509 * 2560; err = 0.33515625 * 2560; time = 0.0145s; samplesPerSecond = 176430.0
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.07637024 * 2560; err = 0.33359375 * 2560; time = 0.0144s; samplesPerSecond = 177605.1
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.06249695 * 2560; err = 0.32500000 * 2560; time = 0.0145s; samplesPerSecond = 177015.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.09361877 * 2560; err = 0.33320312 * 2560; time = 0.0166s; samplesPerSecond = 154216.9
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12118683 * 2560; err = 0.34843750 * 2560; time = 0.0145s; samplesPerSecond = 177027.9
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.13457642 * 2560; err = 0.35195312 * 2560; time = 0.0145s; samplesPerSecond = 176698.0
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.09024963 * 2560; err = 0.33984375 * 2560; time = 0.0145s; samplesPerSecond = 176759.0
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.07457275 * 2560; err = 0.33164063 * 2560; time = 0.0142s; samplesPerSecond = 179775.3
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.05975952 * 2560; err = 0.32070312 * 2560; time = 0.0144s; samplesPerSecond = 177420.5
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.09778137 * 2560; err = 0.33242187 * 2560; time = 0.0144s; samplesPerSecond = 177260.8
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.01963196 * 2560; err = 0.32539062 * 2560; time = 0.0144s; samplesPerSecond = 178012.7
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.07533875 * 2560; err = 0.33515625 * 2560; time = 0.0144s; samplesPerSecond = 177666.7
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.06417236 * 2560; err = 0.33007812 * 2560; time = 0.0145s; samplesPerSecond = 177064.6
-05/03/2016 18:17:10:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.04990234 * 2560; err = 0.33359375 * 2560; time = 0.0144s; samplesPerSecond = 177814.8
-05/03/2016 18:17:10: Finished Epoch[ 2 of 2]: [Training] ce = 1.11232681 * 81920; err = 0.34179688 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.475359s
-05/03/2016 18:17:10: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech'
-05/03/2016 18:17:10: CNTKCommandTrainEnd: dptPre2
+05/03/2016 18:15:21: Starting minibatch loop.
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.12%]: ce = 1.14215403 * 2560; err = 0.34882812 * 2560; time = 0.0134s; samplesPerSecond = 190646.4
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.17049246 * 2560; err = 0.36328125 * 2560; time = 0.0121s; samplesPerSecond = 212025.8
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.24373856 * 2560; err = 0.37460938 * 2560; time = 0.0120s; samplesPerSecond = 213013.8
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.18655586 * 2560; err = 0.36445312 * 2560; time = 0.0120s; samplesPerSecond = 212907.5
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.62%]: ce = 1.13848000 * 2560; err = 0.35039063 * 2560; time = 0.0120s; samplesPerSecond = 212907.5
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.21884155 * 2560; err = 0.36757812 * 2560; time = 0.0120s; samplesPerSecond = 213262.2
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.14372940 * 2560; err = 0.35000000 * 2560; time = 0.0120s; samplesPerSecond = 212996.1
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.12769089 * 2560; err = 0.34960938 * 2560; time = 0.0120s; samplesPerSecond = 212501.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.12%]: ce = 1.14114227 * 2560; err = 0.33554688 * 2560; time = 0.0120s; samplesPerSecond = 212783.6
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.12445145 * 2560; err = 0.34843750 * 2560; time = 0.0120s; samplesPerSecond = 212766.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.14137878 * 2560; err = 0.34101562 * 2560; time = 0.0120s; samplesPerSecond = 213084.7
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.12705154 * 2560; err = 0.33867188 * 2560; time = 0.0120s; samplesPerSecond = 212854.4
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.62%]: ce = 1.10779419 * 2560; err = 0.34531250 * 2560; time = 0.0120s; samplesPerSecond = 212659.9
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.07003021 * 2560; err = 0.32500000 * 2560; time = 0.0121s; samplesPerSecond = 212078.5
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.05308990 * 2560; err = 0.31406250 * 2560; time = 0.0120s; samplesPerSecond = 213013.8
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.06392975 * 2560; err = 0.33085938 * 2560; time = 0.0120s; samplesPerSecond = 212571.6
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.12%]: ce = 1.14430847 * 2560; err = 0.35507813 * 2560; time = 0.0120s; samplesPerSecond = 212996.1
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.14809570 * 2560; err = 0.35859375 * 2560; time = 0.0120s; samplesPerSecond = 213067.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.08184509 * 2560; err = 0.33515625 * 2560; time = 0.0120s; samplesPerSecond = 212960.7
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.07637024 * 2560; err = 0.33359375 * 2560; time = 0.0120s; samplesPerSecond = 212554.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.62%]: ce = 1.06249695 * 2560; err = 0.32500000 * 2560; time = 0.0120s; samplesPerSecond = 212589.3
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.09361877 * 2560; err = 0.33320312 * 2560; time = 0.0120s; samplesPerSecond = 212872.1
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12118683 * 2560; err = 0.34843750 * 2560; time = 0.0120s; samplesPerSecond = 212483.4
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.13457642 * 2560; err = 0.35195312 * 2560; time = 0.0121s; samplesPerSecond = 212184.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.12%]: ce = 1.09024963 * 2560; err = 0.33984375 * 2560; time = 0.0120s; samplesPerSecond = 212712.9
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.07457275 * 2560; err = 0.33164063 * 2560; time = 0.0120s; samplesPerSecond = 212730.6
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.05975952 * 2560; err = 0.32070312 * 2560; time = 0.0120s; samplesPerSecond = 213351.1
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.09778137 * 2560; err = 0.33242187 * 2560; time = 0.0120s; samplesPerSecond = 213209.0
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.62%]: ce = 1.01963196 * 2560; err = 0.32539062 * 2560; time = 0.0120s; samplesPerSecond = 212518.7
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.07533875 * 2560; err = 0.33515625 * 2560; time = 0.0120s; samplesPerSecond = 212889.8
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.06417236 * 2560; err = 0.33007812 * 2560; time = 0.0120s; samplesPerSecond = 213386.7
+05/03/2016 18:15:21:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.04990234 * 2560; err = 0.33359375 * 2560; time = 0.0134s; samplesPerSecond = 190632.2
+05/03/2016 18:15:21: Finished Epoch[ 2 of 2]: [Training] ce = 1.11232681 * 81920; err = 0.34179688 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.390966s
+05/03/2016 18:15:21: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech'
+05/03/2016 18:15:21: CNTKCommandTrainEnd: dptPre2
 
-05/03/2016 18:17:10: Action "train" complete.
+05/03/2016 18:15:21: Action "train" complete.
 
 
-05/03/2016 18:17:10: ##############################################################################
-05/03/2016 18:17:10: #                                                                            #
-05/03/2016 18:17:10: # Action "edit"                                                              #
-05/03/2016 18:17:10: #                                                                            #
-05/03/2016 18:17:10: ##############################################################################
+05/03/2016 18:15:21: ##############################################################################
+05/03/2016 18:15:21: #                                                                            #
+05/03/2016 18:15:21: # Action "edit"                                                              #
+05/03/2016 18:15:21: #                                                                            #
+05/03/2016 18:15:21: ##############################################################################
 
 
 Post-processing network...
@@ -920,26 +920,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 18:17:10: Action "edit" complete.
+05/03/2016 18:15:21: Action "edit" complete.
 
 
-05/03/2016 18:17:10: ##############################################################################
-05/03/2016 18:17:10: #                                                                            #
-05/03/2016 18:17:10: # Action "train"                                                             #
-05/03/2016 18:17:10: #                                                                            #
-05/03/2016 18:17:10: ##############################################################################
+05/03/2016 18:15:21: ##############################################################################
+05/03/2016 18:15:21: #                                                                            #
+05/03/2016 18:15:21: # Action "train"                                                             #
+05/03/2016 18:15:21: #                                                                            #
+05/03/2016 18:15:21: ##############################################################################
 
-05/03/2016 18:17:10: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:15:21: CNTKCommandTrainBegin: speechTrain
 NDLBuilder Using GPU 0
-Reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:17:10: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0'.
+05/03/2016 18:15:21: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0'.
 
 Post-processing network...
 
@@ -991,14 +990,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:17:10: Loaded model with 29 nodes on GPU 0.
+05/03/2016 18:15:21: Loaded model with 29 nodes on GPU 0.
 
-05/03/2016 18:17:10: Training criterion node(s):
-05/03/2016 18:17:10: 	ce = CrossEntropyWithSoftmax
+05/03/2016 18:15:21: Training criterion node(s):
+05/03/2016 18:15:21: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 18:17:10: Evaluation criterion node(s):
+05/03/2016 18:15:21: Evaluation criterion node(s):
 
-05/03/2016 18:17:10: 	err = ErrorPrediction
+05/03/2016 18:15:21: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -1006,147 +1005,152 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[err Gradient[1]] [featNorm Gradient[363 x *6]] [features Gradient[363 x *6]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *6]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *6]] }
-0x7f09fe827098: {[HL3.b Value[512 x 1]] }
-0x7f09fe89e7f8: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *6]] }
-0x7f09fe89e9b8: {[HL2.t Gradient[512 x 1 x *6]] [HL2.y Value[512 x 1 x *6]] }
-0x7f09fe89eb78: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *6]] [HL2.z Gradient[512 x 1 x *6]] [HL3.t Value[512 x 1 x *6]] }
-0x7f09fe89edd8: {[HL3.W Gradient[512 x 512]] [HL3.z Value[512 x 1 x *6]] }
-0x7f09fe89ef98: {[HL3.t Gradient[512 x 1 x *6]] [HL3.y Value[512 x 1 x *6]] }
-0x7f09fe89f158: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *6]] [HL3.z Gradient[512 x 1 x *6]] [OL.t Value[132 x 1 x *6]] }
-0x7f09fe89f318: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *6]] }
-0x7f09fe8b0138: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *6]] }
-0x7f09fe8df7b8: {[globalInvStd Value[363 x 1]] }
-0x7f09fe8fcda8: {[globalPrior Value[132 x 1]] }
-0x7f09fe8fd7b8: {[OL.b Gradient[132 x 1]] }
-0x7f0a09400cb8: {[labels Value[132 x *6]] }
-0x7f0a09401948: {[features Value[363 x *6]] }
-0x7f0a10602098: {[ce Value[1]] }
-0x7f0a10602368: {[HL1.t Gradient[512 x *6]] [HL1.y Value[512 x 1 x *6]] }
-0x7f0a10602528: {[HL1.z Gradient[512 x 1 x *6]] [HL2.t Value[512 x 1 x *6]] }
-0x7f0a10602e28: {[HL1.W Value[512 x 363]] }
-0x7f0a10621db8: {[HL2.b Value[512 x 1]] }
-0x7f0a10622178: {[err Value[1]] }
-0x7f0a10622338: {[scaledLogLikelihood Value[132 x 1 x *6]] }
-0x7f0a10628688: {[HL1.b Value[512 x 1]] }
-0x7f0a10629088: {[globalMean Value[363 x 1]] }
-0x7f0a1062a238: {[HL2.W Value[512 x 512]] }
-0x7f0a10640818: {[HL1.t Value[512 x *6]] }
-0x7f0a10644918: {[ce Gradient[1]] }
-0x7f0a10644ad8: {[HL3.b Gradient[512 x 1]] [HL3.y Gradient[512 x 1 x *6]] [OL.z Gradient[132 x 1 x *6]] }
-0x7f0a10644c98: {[OL.t Gradient[132 x 1 x *6]] }
-0x7f0a106add08: {[OL.W Value[132 x 512]] }
-0x7f0a106addb8: {[OL.b Value[132 x 1]] }
-0x7f0a106b1c08: {[featNorm Value[363 x *6]] }
-0x7f0a106b1cc8: {[logPrior Value[132 x 1]] }
-0x7f0a106b28a8: {[HL3.W Value[512 x 512]] }
+0x7f1df4980698: {[globalPrior Value[132 x 1]] }
+0x7f1df4982b58: {[HL2.t Gradient[512 x 1 x *6]] [HL2.y Value[512 x 1 x *6]] }
+0x7f1df4982d18: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *6]] [HL2.z Gradient[512 x 1 x *6]] [HL3.t Value[512 x 1 x *6]] }
+0x7f1df4982ed8: {[HL3.W Gradient[512 x 512]] [HL3.z Value[512 x 1 x *6]] }
+0x7f1df4983098: {[HL3.t Gradient[512 x 1 x *6]] [HL3.y Value[512 x 1 x *6]] }
+0x7f1df4983258: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *6]] [HL3.z Gradient[512 x 1 x *6]] [OL.t Value[132 x 1 x *6]] }
+0x7f1df4983418: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *6]] }
+0x7f1df4998338: {[logPrior Value[132 x 1]] }
+0x7f1df49a7708: {[HL1.b Value[512 x 1]] }
+0x7f1df49c7058: {[HL3.b Gradient[512 x 1]] [HL3.y Gradient[512 x 1 x *6]] [OL.z Gradient[132 x 1 x *6]] }
+0x7f1df49c7218: {[OL.t Gradient[132 x 1 x *6]] }
+0x7f1df49c73d8: {[OL.b Gradient[132 x 1]] }
+0x7f1df49c7d58: {[HL1.W Value[512 x 363]] }
+0x7f1df49dc578: {[HL3.b Value[512 x 1]] }
+0x7f1df49dd138: {[globalMean Value[363 x 1]] }
+0x7f1df49e4b28: {[ce Gradient[1]] }
+0x7f1dfb303dd8: {[OL.W Value[132 x 512]] }
+0x7f1dfb305b88: {[scaledLogLikelihood Value[132 x 1 x *6]] }
+0x7f1dfb305d48: {[ce Value[1]] }
+0x7f1dfb320af8: {[err Value[1]] }
+0x7f1dfb33ad78: {[labels Value[132 x *6]] }
+0x7f1dfb33ae28: {[globalInvStd Value[363 x 1]] }
+0x7f1dfb352a38: {[HL2.b Value[512 x 1]] }
+0x7f1dfb3a1c78: {[features Value[363 x *6]] }
+0x7f1dfb3a7588: {[featNorm Value[363 x *6]] }
+0x7f1dfb3a8bb8: {[HL2.W Value[512 x 512]] }
+0x7f1dfb3a9d38: {[HL3.W Value[512 x 512]] }
+0x7f1dfb3a9f88: {[HL1.t Value[512 x *6]] }
+0x7f1dfb3aa288: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *6]] }
+0x7f1dfb3aa448: {[HL1.t Gradient[512 x *6]] [HL1.y Value[512 x 1 x *6]] }
+0x7f1dfb3aa608: {[HL1.z Gradient[512 x 1 x *6]] [HL2.t Value[512 x 1 x *6]] }
+0x7f1dfb3aa7c8: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *6]] }
+0x7f1dfb3ac6d8: {[OL.b Value[132 x 1]] }
 
-05/03/2016 18:17:10: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:15:21: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:17:10: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900117  momentum as time constant = 2432.7 samples
+05/03/2016 18:15:21: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900117  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:17:11: Starting minibatch loop.
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: ce = 3.97086372 * 2560; err = 0.81445312 * 2560; time = 0.1083s; samplesPerSecond = 23627.4
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: ce = 2.63975792 * 2560; err = 0.63320312 * 2560; time = 0.0185s; samplesPerSecond = 138625.7
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: ce = 2.02565231 * 2560; err = 0.54257813 * 2560; time = 0.0186s; samplesPerSecond = 137597.4
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: ce = 1.74204865 * 2560; err = 0.47500000 * 2560; time = 0.0189s; samplesPerSecond = 135571.7
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: ce = 1.58343964 * 2560; err = 0.45156250 * 2560; time = 0.0187s; samplesPerSecond = 136876.4
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: ce = 1.47893143 * 2560; err = 0.42343750 * 2560; time = 0.0185s; samplesPerSecond = 138468.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: ce = 1.43405457 * 2560; err = 0.40898438 * 2560; time = 0.0187s; samplesPerSecond = 137162.5
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: ce = 1.35973663 * 2560; err = 0.39648438 * 2560; time = 0.0186s; samplesPerSecond = 137412.8
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: ce = 1.28108978 * 2560; err = 0.37968750 * 2560; time = 0.0187s; samplesPerSecond = 136854.5
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: ce = 1.29773560 * 2560; err = 0.39765625 * 2560; time = 0.0186s; samplesPerSecond = 137471.8
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: ce = 1.28441925 * 2560; err = 0.39062500 * 2560; time = 0.0187s; samplesPerSecond = 136979.0
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: ce = 1.27777252 * 2560; err = 0.38164063 * 2560; time = 0.0187s; samplesPerSecond = 136869.1
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: ce = 1.23615112 * 2560; err = 0.37421875 * 2560; time = 0.0187s; samplesPerSecond = 137199.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: ce = 1.31171112 * 2560; err = 0.38671875 * 2560; time = 0.0187s; samplesPerSecond = 137199.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: ce = 1.25573883 * 2560; err = 0.37773438 * 2560; time = 0.0187s; samplesPerSecond = 137074.3
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: ce = 1.27382965 * 2560; err = 0.38398437 * 2560; time = 0.0187s; samplesPerSecond = 137045.0
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: ce = 1.20634155 * 2560; err = 0.36406250 * 2560; time = 0.0187s; samplesPerSecond = 137199.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: ce = 1.20973816 * 2560; err = 0.36562500 * 2560; time = 0.0187s; samplesPerSecond = 137169.8
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: ce = 1.20688782 * 2560; err = 0.36718750 * 2560; time = 0.0187s; samplesPerSecond = 136942.3
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: ce = 1.20260315 * 2560; err = 0.37226562 * 2560; time = 0.0187s; samplesPerSecond = 137037.6
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: ce = 1.20553894 * 2560; err = 0.37187500 * 2560; time = 0.0188s; samplesPerSecond = 136322.5
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: ce = 1.14160156 * 2560; err = 0.34726563 * 2560; time = 0.0192s; samplesPerSecond = 133097.6
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: ce = 1.15316467 * 2560; err = 0.35273437 * 2560; time = 0.0191s; samplesPerSecond = 133702.4
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: ce = 1.19352417 * 2560; err = 0.35468750 * 2560; time = 0.0187s; samplesPerSecond = 137125.7
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: ce = 1.17192078 * 2560; err = 0.35937500 * 2560; time = 0.0184s; samplesPerSecond = 138791.0
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: ce = 1.08281860 * 2560; err = 0.33867188 * 2560; time = 0.0190s; samplesPerSecond = 134432.6
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: ce = 1.11028442 * 2560; err = 0.34453125 * 2560; time = 0.0190s; samplesPerSecond = 134467.9
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: ce = 1.17454224 * 2560; err = 0.35312500 * 2560; time = 0.0185s; samplesPerSecond = 138648.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: ce = 1.11068115 * 2560; err = 0.34531250 * 2560; time = 0.0183s; samplesPerSecond = 140274.0
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: ce = 1.12955627 * 2560; err = 0.34296875 * 2560; time = 0.0189s; samplesPerSecond = 135092.3
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: ce = 1.12482300 * 2560; err = 0.34570312 * 2560; time = 0.0191s; samplesPerSecond = 133835.2
-05/03/2016 18:17:11:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: ce = 1.12771912 * 2560; err = 0.34453125 * 2560; time = 0.0185s; samplesPerSecond = 138745.9
-05/03/2016 18:17:11: Finished Epoch[ 1 of 4]: [Training] ce = 1.40639620 * 81920; err = 0.40274658 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.730805s
-05/03/2016 18:17:11: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.1'
+05/03/2016 18:15:21: Starting minibatch loop.
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: ce = 3.97086372 * 2560; err = 0.81445312 * 2560; time = 0.0195s; samplesPerSecond = 131592.5
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: ce = 2.63975792 * 2560; err = 0.63320312 * 2560; time = 0.0163s; samplesPerSecond = 157238.5
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: ce = 2.02565231 * 2560; err = 0.54257813 * 2560; time = 0.0162s; samplesPerSecond = 157664.6
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: ce = 1.74204865 * 2560; err = 0.47500000 * 2560; time = 0.0163s; samplesPerSecond = 157296.5
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: ce = 1.58343964 * 2560; err = 0.45156250 * 2560; time = 0.0163s; samplesPerSecond = 156978.2
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: ce = 1.47893143 * 2560; err = 0.42343750 * 2560; time = 0.0163s; samplesPerSecond = 157441.6
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: ce = 1.43405457 * 2560; err = 0.40898438 * 2560; time = 0.0162s; samplesPerSecond = 157713.2
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: ce = 1.35973663 * 2560; err = 0.39648438 * 2560; time = 0.0163s; samplesPerSecond = 157460.9
+05/03/2016 18:15:21:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: ce = 1.28108978 * 2560; err = 0.37968750 * 2560; time = 0.0163s; samplesPerSecond = 157055.2
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: ce = 1.29773560 * 2560; err = 0.39765625 * 2560; time = 0.0163s; samplesPerSecond = 157344.8
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: ce = 1.28441925 * 2560; err = 0.39062500 * 2560; time = 0.0163s; samplesPerSecond = 157277.1
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: ce = 1.27777252 * 2560; err = 0.38164063 * 2560; time = 0.0163s; samplesPerSecond = 157431.9
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: ce = 1.23615112 * 2560; err = 0.37421875 * 2560; time = 0.0163s; samplesPerSecond = 157431.9
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: ce = 1.31171112 * 2560; err = 0.38671875 * 2560; time = 0.0163s; samplesPerSecond = 157402.9
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: ce = 1.25573883 * 2560; err = 0.37773438 * 2560; time = 0.0163s; samplesPerSecond = 157219.2
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: ce = 1.27382965 * 2560; err = 0.38398437 * 2560; time = 0.0163s; samplesPerSecond = 157383.5
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: ce = 1.20634155 * 2560; err = 0.36406250 * 2560; time = 0.0163s; samplesPerSecond = 157344.8
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: ce = 1.20973816 * 2560; err = 0.36562500 * 2560; time = 0.0162s; samplesPerSecond = 157577.2
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: ce = 1.20688782 * 2560; err = 0.36718750 * 2560; time = 0.0163s; samplesPerSecond = 157238.5
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: ce = 1.20260315 * 2560; err = 0.37226562 * 2560; time = 0.0163s; samplesPerSecond = 157528.8
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: ce = 1.20553894 * 2560; err = 0.37187500 * 2560; time = 0.0163s; samplesPerSecond = 157431.9
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: ce = 1.14160156 * 2560; err = 0.34726563 * 2560; time = 0.0164s; samplesPerSecond = 156183.3
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: ce = 1.15316467 * 2560; err = 0.35273437 * 2560; time = 0.0164s; samplesPerSecond = 156555.8
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: ce = 1.19352417 * 2560; err = 0.35468750 * 2560; time = 0.0163s; samplesPerSecond = 156853.1
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: ce = 1.17192078 * 2560; err = 0.35937500 * 2560; time = 0.0164s; samplesPerSecond = 156211.9
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: ce = 1.08281860 * 2560; err = 0.33867188 * 2560; time = 0.0164s; samplesPerSecond = 156431.4
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: ce = 1.11028442 * 2560; err = 0.34453125 * 2560; time = 0.0163s; samplesPerSecond = 156584.5
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: ce = 1.17454224 * 2560; err = 0.35312500 * 2560; time = 0.0164s; samplesPerSecond = 156250.0
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: ce = 1.11068115 * 2560; err = 0.34531250 * 2560; time = 0.0163s; samplesPerSecond = 156603.7
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: ce = 1.12955627 * 2560; err = 0.34296875 * 2560; time = 0.0164s; samplesPerSecond = 156278.6
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: ce = 1.12482300 * 2560; err = 0.34570312 * 2560; time = 0.0164s; samplesPerSecond = 156040.5
+05/03/2016 18:15:22:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: ce = 1.12771912 * 2560; err = 0.34453125 * 2560; time = 0.0164s; samplesPerSecond = 156565.3
+05/03/2016 18:15:22: Finished Epoch[ 1 of 4]: [Training] ce = 1.40639620 * 81920; err = 0.40274658 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=0.608651s
+05/03/2016 18:15:22: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.1'
 
-05/03/2016 18:17:11: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 18:15:22: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:17:11: Starting minibatch loop.
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.51739798 * 5120; err = 0.41425781 * 5120; time = 0.0371s; samplesPerSecond = 138083.6
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.25793447 * 5120; err = 0.37539062 * 5120; time = 0.0293s; samplesPerSecond = 174887.3
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.18638287 * 5120; err = 0.36718750 * 5120; time = 0.0293s; samplesPerSecond = 174803.7
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.12794571 * 5120; err = 0.34218750 * 5120; time = 0.0292s; samplesPerSecond = 175198.5
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.14070625 * 5120; err = 0.34570312 * 5120; time = 0.0291s; samplesPerSecond = 176163.0
-05/03/2016 18:17:11:  Epoch[ 2 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.14582825 * 5120; err = 0.34765625 * 5120; time = 0.0290s; samplesPerSecond = 176320.7
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.11193542 * 5120; err = 0.34414062 * 5120; time = 0.0291s; samplesPerSecond = 175721.6
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.08574600 * 5120; err = 0.33789062 * 5120; time = 0.0293s; samplesPerSecond = 174714.2
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.21058807 * 5120; err = 0.37363281 * 5120; time = 0.0293s; samplesPerSecond = 174791.8
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.09668579 * 5120; err = 0.34335938 * 5120; time = 0.0292s; samplesPerSecond = 175192.5
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.05845032 * 5120; err = 0.32675781 * 5120; time = 0.0292s; samplesPerSecond = 175372.5
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.10728455 * 5120; err = 0.34726563 * 5120; time = 0.0292s; samplesPerSecond = 175625.2
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.08716888 * 5120; err = 0.33593750 * 5120; time = 0.0290s; samplesPerSecond = 176838.3
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.06778870 * 5120; err = 0.31855469 * 5120; time = 0.0291s; samplesPerSecond = 176023.7
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.04079590 * 5120; err = 0.32910156 * 5120; time = 0.0292s; samplesPerSecond = 175372.5
-05/03/2016 18:17:12:  Epoch[ 2 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.06249542 * 5120; err = 0.32968750 * 5120; time = 0.0333s; samplesPerSecond = 153823.0
-05/03/2016 18:17:12: Finished Epoch[ 2 of 4]: [Training] ce = 1.14407091 * 81920; err = 0.34866943 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.48398s
-05/03/2016 18:17:12: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.2'
+05/03/2016 18:15:22: Starting minibatch loop.
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.51739798 * 5120; err = 0.41425781 * 5120; time = 0.0291s; samplesPerSecond = 175999.5
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.25793447 * 5120; err = 0.37539062 * 5120; time = 0.0255s; samplesPerSecond = 200800.1
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.18638287 * 5120; err = 0.36718750 * 5120; time = 0.0256s; samplesPerSecond = 199812.7
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.12794571 * 5120; err = 0.34218750 * 5120; time = 0.0256s; samplesPerSecond = 200226.8
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.14070625 * 5120; err = 0.34570312 * 5120; time = 0.0255s; samplesPerSecond = 200776.4
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.14582825 * 5120; err = 0.34765625 * 5120; time = 0.0255s; samplesPerSecond = 200713.5
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.11193542 * 5120; err = 0.34414062 * 5120; time = 0.0255s; samplesPerSecond = 200886.7
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.08574600 * 5120; err = 0.33789062 * 5120; time = 0.0255s; samplesPerSecond = 200713.5
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.21058807 * 5120; err = 0.37363281 * 5120; time = 0.0255s; samplesPerSecond = 200886.7
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.09668579 * 5120; err = 0.34335938 * 5120; time = 0.0255s; samplesPerSecond = 200847.3
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.05845032 * 5120; err = 0.32675781 * 5120; time = 0.0255s; samplesPerSecond = 200517.0
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.10728455 * 5120; err = 0.34726563 * 5120; time = 0.0255s; samplesPerSecond = 201060.3
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.08716888 * 5120; err = 0.33593750 * 5120; time = 0.0255s; samplesPerSecond = 201091.9
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.06778870 * 5120; err = 0.31855469 * 5120; time = 0.0255s; samplesPerSecond = 200894.6
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.04079590 * 5120; err = 0.32910156 * 5120; time = 0.0255s; samplesPerSecond = 200430.6
+05/03/2016 18:15:22:  Epoch[ 2 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.06249542 * 5120; err = 0.32968750 * 5120; time = 0.0255s; samplesPerSecond = 200682.0
+05/03/2016 18:15:22: Finished Epoch[ 2 of 4]: [Training] ce = 1.14407091 * 81920; err = 0.34866943 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.415118s
+05/03/2016 18:15:22: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.2'
 
-05/03/2016 18:17:12: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 18:15:22: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 2: frames [163840..245760] (first utterance at frame 163840), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:17:12: Starting minibatch loop.
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.11238871 * 5120; err = 0.34804687 * 5120; time = 0.0305s; samplesPerSecond = 167780.8
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.09456148 * 5120; err = 0.34121094 * 5120; time = 0.0291s; samplesPerSecond = 176187.2
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.10800076 * 5120; err = 0.34667969 * 5120; time = 0.0290s; samplesPerSecond = 176728.5
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.16617966 * 5120; err = 0.35566406 * 5120; time = 0.0290s; samplesPerSecond = 176765.1
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.14173546 * 5120; err = 0.34550781 * 5120; time = 0.0292s; samplesPerSecond = 175396.5
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.07876053 * 5120; err = 0.33359375 * 5120; time = 0.0297s; samplesPerSecond = 172297.8
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.08043213 * 5120; err = 0.33437500 * 5120; time = 0.0297s; samplesPerSecond = 172251.4
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.07423630 * 5120; err = 0.33007812 * 5120; time = 0.0297s; samplesPerSecond = 172292.0
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.02659454 * 5120; err = 0.31113281 * 5120; time = 0.0297s; samplesPerSecond = 172338.3
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.04602737 * 5120; err = 0.31855469 * 5120; time = 0.0298s; samplesPerSecond = 171817.8
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.05524902 * 5120; err = 0.33613281 * 5120; time = 0.0296s; samplesPerSecond = 172722.1
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.07627411 * 5120; err = 0.33613281 * 5120; time = 0.0296s; samplesPerSecond = 172862.0
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.05101776 * 5120; err = 0.31660156 * 5120; time = 0.0297s; samplesPerSecond = 172669.6
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.03016815 * 5120; err = 0.32480469 * 5120; time = 0.0298s; samplesPerSecond = 171864.0
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.04644623 * 5120; err = 0.32929687 * 5120; time = 0.0296s; samplesPerSecond = 173171.9
-05/03/2016 18:17:12:  Epoch[ 3 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.02751465 * 5120; err = 0.32265625 * 5120; time = 0.0294s; samplesPerSecond = 174339.4
-05/03/2016 18:17:12: Finished Epoch[ 3 of 4]: [Training] ce = 1.07597418 * 81920; err = 0.33315430 * 81920; totalSamplesSeen = 245760; learningRatePerSample = 0.003125; epochTime=0.477846s
-05/03/2016 18:17:12: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.3'
+05/03/2016 18:15:22: Starting minibatch loop.
+05/03/2016 18:15:22:  Epoch[ 3 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.11238871 * 5120; err = 0.34804687 * 5120; time = 0.0263s; samplesPerSecond = 194432.8
+05/03/2016 18:15:22:  Epoch[ 3 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.09456148 * 5120; err = 0.34121094 * 5120; time = 0.0255s; samplesPerSecond = 200564.1
+05/03/2016 18:15:22:  Epoch[ 3 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.10800076 * 5120; err = 0.34667969 * 5120; time = 0.0255s; samplesPerSecond = 200697.7
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.16617966 * 5120; err = 0.35566406 * 5120; time = 0.0255s; samplesPerSecond = 200548.4
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.14173546 * 5120; err = 0.34550781 * 5120; time = 0.0255s; samplesPerSecond = 201068.2
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.07876053 * 5120; err = 0.33359375 * 5120; time = 0.0255s; samplesPerSecond = 200878.8
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.08043213 * 5120; err = 0.33437500 * 5120; time = 0.0256s; samplesPerSecond = 200367.9
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.07423630 * 5120; err = 0.33007812 * 5120; time = 0.0256s; samplesPerSecond = 200273.8
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.02659454 * 5120; err = 0.31113281 * 5120; time = 0.0256s; samplesPerSecond = 200367.9
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.04602737 * 5120; err = 0.31855469 * 5120; time = 0.0256s; samplesPerSecond = 200344.3
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.05524902 * 5120; err = 0.33613281 * 5120; time = 0.0256s; samplesPerSecond = 200070.3
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.07627411 * 5120; err = 0.33613281 * 5120; time = 0.0256s; samplesPerSecond = 200132.9
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.05101776 * 5120; err = 0.31660156 * 5120; time = 0.0255s; samplesPerSecond = 200697.7
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.03016815 * 5120; err = 0.32480469 * 5120; time = 0.0255s; samplesPerSecond = 200989.2
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.04644623 * 5120; err = 0.32929687 * 5120; time = 0.0255s; samplesPerSecond = 200603.4
+05/03/2016 18:15:23:  Epoch[ 3 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.02751465 * 5120; err = 0.32265625 * 5120; time = 0.0255s; samplesPerSecond = 200414.9
+05/03/2016 18:15:23: Finished Epoch[ 3 of 4]: [Training] ce = 1.07597418 * 81920; err = 0.33315430 * 81920; totalSamplesSeen = 245760; learningRatePerSample = 0.003125; epochTime=0.412316s
+05/03/2016 18:15:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.3'
 
-05/03/2016 18:17:12: Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 18:15:23: Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 3: frames [245760..327680] (first utterance at frame 245760), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:17:12: Starting minibatch loop.
-05/03/2016 18:17:12:  Epoch[ 4 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.03003817 * 5120; err = 0.31289062 * 5120; time = 0.0315s; samplesPerSecond = 162519.0
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.04547925 * 4926; err = 0.32947625 * 4926; time = 0.1533s; samplesPerSecond = 32130.6
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.01249580 * 5120; err = 0.32246094 * 5120; time = 0.0298s; samplesPerSecond = 171593.3
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  31-  40, 25.00%]: ce = 0.99796486 * 5120; err = 0.31425781 * 5120; time = 0.0319s; samplesPerSecond = 160491.5
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  41-  50, 31.25%]: ce = 0.99781761 * 5120; err = 0.31464844 * 5120; time = 0.0297s; samplesPerSecond = 172187.7
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.00107079 * 5120; err = 0.31855469 * 5120; time = 0.0297s; samplesPerSecond = 172106.6
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.02518806 * 5120; err = 0.31972656 * 5120; time = 0.0480s; samplesPerSecond = 106584.5
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.00891876 * 5120; err = 0.31660156 * 5120; time = 0.0300s; samplesPerSecond = 170740.7
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  81-  90, 56.25%]: ce = 0.99774780 * 5120; err = 0.30585937 * 5120; time = 0.0299s; samplesPerSecond = 171174.5
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.00037842 * 5120; err = 0.30722656 * 5120; time = 0.0300s; samplesPerSecond = 170894.5
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.02586746 * 5120; err = 0.31816406 * 5120; time = 0.0299s; samplesPerSecond = 171031.5
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.06024628 * 5120; err = 0.33574219 * 5120; time = 0.0300s; samplesPerSecond = 170541.6
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 0.98301010 * 5120; err = 0.30214844 * 5120; time = 0.0299s; samplesPerSecond = 171168.8
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 0.96488800 * 5120; err = 0.30156250 * 5120; time = 0.0299s; samplesPerSecond = 171065.8
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 0.99069977 * 5120; err = 0.31640625 * 5120; time = 0.0300s; samplesPerSecond = 170866.0
-05/03/2016 18:17:13:  Epoch[ 4 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 0.97961731 * 5120; err = 0.29921875 * 5120; time = 0.0299s; samplesPerSecond = 171403.7
-05/03/2016 18:17:13: Finished Epoch[ 4 of 4]: [Training] ce = 1.00739784 * 81920; err = 0.31477051 * 81920; totalSamplesSeen = 327680; learningRatePerSample = 0.003125; epochTime=0.629907s
-05/03/2016 18:17:13: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech'
-05/03/2016 18:17:13: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:15:23: Starting minibatch loop.
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.03003817 * 5120; err = 0.31289062 * 5120; time = 0.0264s; samplesPerSecond = 193770.6
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.04547925 * 4926; err = 0.32947625 * 4926; time = 0.0540s; samplesPerSecond = 91249.3
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.01249580 * 5120; err = 0.32246094 * 5120; time = 0.0255s; samplesPerSecond = 200414.9
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  31-  40, 25.00%]: ce = 0.99796486 * 5120; err = 0.31425781 * 5120; time = 0.0255s; samplesPerSecond = 200784.3
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  41-  50, 31.25%]: ce = 0.99781761 * 5120; err = 0.31464844 * 5120; time = 0.0255s; samplesPerSecond = 200517.0
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.00107079 * 5120; err = 0.31855469 * 5120; time = 0.0255s; samplesPerSecond = 200878.8
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.02518806 * 5120; err = 0.31972656 * 5120; time = 0.0256s; samplesPerSecond = 200360.0
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.00891876 * 5120; err = 0.31660156 * 5120; time = 0.0256s; samplesPerSecond = 200328.7
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  81-  90, 56.25%]: ce = 0.99774780 * 5120; err = 0.30585937 * 5120; time = 0.0255s; samplesPerSecond = 200650.5
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.00037842 * 5120; err = 0.30722656 * 5120; time = 0.0255s; samplesPerSecond = 200831.6
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.02586746 * 5120; err = 0.31816406 * 5120; time = 0.0255s; samplesPerSecond = 200792.2
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.06024628 * 5120; err = 0.33574219 * 5120; time = 0.0255s; samplesPerSecond = 200564.1
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 0.98301010 * 5120; err = 0.30214844 * 5120; time = 0.0255s; samplesPerSecond = 200556.2
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 0.96488800 * 5120; err = 0.30156250 * 5120; time = 0.0256s; samplesPerSecond = 200234.6
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 0.99069977 * 5120; err = 0.31640625 * 5120; time = 0.0255s; samplesPerSecond = 200989.2
+05/03/2016 18:15:23:  Epoch[ 4 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 0.97961731 * 5120; err = 0.29921875 * 5120; time = 0.0255s; samplesPerSecond = 200697.7
+05/03/2016 18:15:23: Finished Epoch[ 4 of 4]: [Training] ce = 1.00739784 * 81920; err = 0.31477051 * 81920; totalSamplesSeen = 327680; learningRatePerSample = 0.003125; epochTime=0.442421s
+05/03/2016 18:15:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech'
+05/03/2016 18:15:23: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:17:13: Action "train" complete.
+05/03/2016 18:15:23: Action "train" complete.
 
-05/03/2016 18:17:13: __COMPLETED__
\ No newline at end of file
+05/03/2016 18:15:23: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.cpu.txt
index e4bb7ca03..58efbd3f8 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.cpu.txt
@@ -639,76 +639,76 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  3.88299332; EvalErrorPrediction = 0.84648437; TotalTime = 1.48752s; TotalTimePerSample = 0.58106ms; SamplesPerSecond = 1720
- Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.95486374; EvalErrorPrediction = 0.71289063; TotalTime = 1.04921s; TotalTimePerSample = 0.40985ms; SamplesPerSecond = 2439
- Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.54855499; EvalErrorPrediction = 0.64804688; TotalTime = 1.26032s; TotalTimePerSample = 0.49231ms; SamplesPerSecond = 2031
- Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  2.30297852; EvalErrorPrediction = 0.61679688; TotalTime = 0.80500s; TotalTimePerSample = 0.31445ms; SamplesPerSecond = 3180
- Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  2.02965393; EvalErrorPrediction = 0.54843750; TotalTime = 0.78922s; TotalTimePerSample = 0.30829ms; SamplesPerSecond = 3243
- Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.86639099; EvalErrorPrediction = 0.52187500; TotalTime = 0.80311s; TotalTimePerSample = 0.31371ms; SamplesPerSecond = 3187
- Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.78586273; EvalErrorPrediction = 0.50976563; TotalTime = 0.77006s; TotalTimePerSample = 0.30081ms; SamplesPerSecond = 3324
- Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.75383606; EvalErrorPrediction = 0.50507813; TotalTime = 0.79602s; TotalTimePerSample = 0.31095ms; SamplesPerSecond = 3215
- Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.66867371; EvalErrorPrediction = 0.45625000; TotalTime = 0.78483s; TotalTimePerSample = 0.30658ms; SamplesPerSecond = 3261
- Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.62280121; EvalErrorPrediction = 0.48710938; TotalTime = 0.80117s; TotalTimePerSample = 0.31296ms; SamplesPerSecond = 3195
- Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.65261841; EvalErrorPrediction = 0.47695312; TotalTime = 0.79092s; TotalTimePerSample = 0.30895ms; SamplesPerSecond = 3236
- Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.53025208; EvalErrorPrediction = 0.45976563; TotalTime = 1.09289s; TotalTimePerSample = 0.42691ms; SamplesPerSecond = 2342
- Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.47788544; EvalErrorPrediction = 0.43945313; TotalTime = 0.78271s; TotalTimePerSample = 0.30575ms; SamplesPerSecond = 3270
- Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.51797485; EvalErrorPrediction = 0.45195313; TotalTime = 0.79339s; TotalTimePerSample = 0.30992ms; SamplesPerSecond = 3226
- Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.46555481; EvalErrorPrediction = 0.43203125; TotalTime = 0.83098s; TotalTimePerSample = 0.32460ms; SamplesPerSecond = 3080
- Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.42054138; EvalErrorPrediction = 0.43320313; TotalTime = 1.14820s; TotalTimePerSample = 0.44851ms; SamplesPerSecond = 2229
- Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41380615; EvalErrorPrediction = 0.41875000; TotalTime = 0.81105s; TotalTimePerSample = 0.31682ms; SamplesPerSecond = 3156
- Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.45621948; EvalErrorPrediction = 0.42695312; TotalTime = 0.78965s; TotalTimePerSample = 0.30846ms; SamplesPerSecond = 3241
- Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.48666992; EvalErrorPrediction = 0.44218750; TotalTime = 0.77602s; TotalTimePerSample = 0.30313ms; SamplesPerSecond = 3298
- Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.43567505; EvalErrorPrediction = 0.43710938; TotalTime = 0.77241s; TotalTimePerSample = 0.30172ms; SamplesPerSecond = 3314
- Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.35690918; EvalErrorPrediction = 0.41562500; TotalTime = 0.80105s; TotalTimePerSample = 0.31291ms; SamplesPerSecond = 3195
- Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.39897156; EvalErrorPrediction = 0.42539063; TotalTime = 0.78187s; TotalTimePerSample = 0.30542ms; SamplesPerSecond = 3274
- Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.40048218; EvalErrorPrediction = 0.40937500; TotalTime = 0.76873s; TotalTimePerSample = 0.30029ms; SamplesPerSecond = 3330
- Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.39494934; EvalErrorPrediction = 0.43046875; TotalTime = 0.77391s; TotalTimePerSample = 0.30231ms; SamplesPerSecond = 3307
- Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33256531; EvalErrorPrediction = 0.40312500; TotalTime = 0.81253s; TotalTimePerSample = 0.31740ms; SamplesPerSecond = 3150
- Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27213135; EvalErrorPrediction = 0.39687500; TotalTime = 0.82009s; TotalTimePerSample = 0.32035ms; SamplesPerSecond = 3121
- Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31734619; EvalErrorPrediction = 0.39804688; TotalTime = 1.00677s; TotalTimePerSample = 0.39327ms; SamplesPerSecond = 2542
- Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.25228271; EvalErrorPrediction = 0.38437500; TotalTime = 1.24405s; TotalTimePerSample = 0.48596ms; SamplesPerSecond = 2057
- Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22771301; EvalErrorPrediction = 0.37343750; TotalTime = 0.81330s; TotalTimePerSample = 0.31770ms; SamplesPerSecond = 3147
- Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20522766; EvalErrorPrediction = 0.35976562; TotalTime = 0.80344s; TotalTimePerSample = 0.31384ms; SamplesPerSecond = 3186
- Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23264771; EvalErrorPrediction = 0.36718750; TotalTime = 0.81546s; TotalTimePerSample = 0.31854ms; SamplesPerSecond = 3139
- Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23756104; EvalErrorPrediction = 0.37773438; TotalTime = 0.89764s; TotalTimePerSample = 0.35064ms; SamplesPerSecond = 2851
-Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.6532061; EvalErrorPrediction = 0.46914062; learningRatePerSample  = 0.003125000047; EpochTime=32.141459
+ Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  3.88299332; err = 0.84648437; TotalTime = 1.48752s; TotalTimePerSample = 0.58106ms; SamplesPerSecond = 1720
+ Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.95486374; err = 0.71289063; TotalTime = 1.04921s; TotalTimePerSample = 0.40985ms; SamplesPerSecond = 2439
+ Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.54855499; err = 0.64804688; TotalTime = 1.26032s; TotalTimePerSample = 0.49231ms; SamplesPerSecond = 2031
+ Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  2.30297852; err = 0.61679688; TotalTime = 0.80500s; TotalTimePerSample = 0.31445ms; SamplesPerSecond = 3180
+ Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  2.02965393; err = 0.54843750; TotalTime = 0.78922s; TotalTimePerSample = 0.30829ms; SamplesPerSecond = 3243
+ Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.86639099; err = 0.52187500; TotalTime = 0.80311s; TotalTimePerSample = 0.31371ms; SamplesPerSecond = 3187
+ Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.78586273; err = 0.50976563; TotalTime = 0.77006s; TotalTimePerSample = 0.30081ms; SamplesPerSecond = 3324
+ Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.75383606; err = 0.50507813; TotalTime = 0.79602s; TotalTimePerSample = 0.31095ms; SamplesPerSecond = 3215
+ Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.66867371; err = 0.45625000; TotalTime = 0.78483s; TotalTimePerSample = 0.30658ms; SamplesPerSecond = 3261
+ Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.62280121; err = 0.48710938; TotalTime = 0.80117s; TotalTimePerSample = 0.31296ms; SamplesPerSecond = 3195
+ Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.65261841; err = 0.47695312; TotalTime = 0.79092s; TotalTimePerSample = 0.30895ms; SamplesPerSecond = 3236
+ Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.53025208; err = 0.45976563; TotalTime = 1.09289s; TotalTimePerSample = 0.42691ms; SamplesPerSecond = 2342
+ Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.47788544; err = 0.43945313; TotalTime = 0.78271s; TotalTimePerSample = 0.30575ms; SamplesPerSecond = 3270
+ Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.51797485; err = 0.45195313; TotalTime = 0.79339s; TotalTimePerSample = 0.30992ms; SamplesPerSecond = 3226
+ Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.46555481; err = 0.43203125; TotalTime = 0.83098s; TotalTimePerSample = 0.32460ms; SamplesPerSecond = 3080
+ Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.42054138; err = 0.43320313; TotalTime = 1.14820s; TotalTimePerSample = 0.44851ms; SamplesPerSecond = 2229
+ Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.41380615; err = 0.41875000; TotalTime = 0.81105s; TotalTimePerSample = 0.31682ms; SamplesPerSecond = 3156
+ Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.45621948; err = 0.42695312; TotalTime = 0.78965s; TotalTimePerSample = 0.30846ms; SamplesPerSecond = 3241
+ Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.48666992; err = 0.44218750; TotalTime = 0.77602s; TotalTimePerSample = 0.30313ms; SamplesPerSecond = 3298
+ Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.43567505; err = 0.43710938; TotalTime = 0.77241s; TotalTimePerSample = 0.30172ms; SamplesPerSecond = 3314
+ Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.35690918; err = 0.41562500; TotalTime = 0.80105s; TotalTimePerSample = 0.31291ms; SamplesPerSecond = 3195
+ Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.39897156; err = 0.42539063; TotalTime = 0.78187s; TotalTimePerSample = 0.30542ms; SamplesPerSecond = 3274
+ Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.40048218; err = 0.40937500; TotalTime = 0.76873s; TotalTimePerSample = 0.30029ms; SamplesPerSecond = 3330
+ Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.39494934; err = 0.43046875; TotalTime = 0.77391s; TotalTimePerSample = 0.30231ms; SamplesPerSecond = 3307
+ Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.33256531; err = 0.40312500; TotalTime = 0.81253s; TotalTimePerSample = 0.31740ms; SamplesPerSecond = 3150
+ Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.27213135; err = 0.39687500; TotalTime = 0.82009s; TotalTimePerSample = 0.32035ms; SamplesPerSecond = 3121
+ Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.31734619; err = 0.39804688; TotalTime = 1.00677s; TotalTimePerSample = 0.39327ms; SamplesPerSecond = 2542
+ Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.25228271; err = 0.38437500; TotalTime = 1.24405s; TotalTimePerSample = 0.48596ms; SamplesPerSecond = 2057
+ Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.22771301; err = 0.37343750; TotalTime = 0.81330s; TotalTimePerSample = 0.31770ms; SamplesPerSecond = 3147
+ Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.20522766; err = 0.35976562; TotalTime = 0.80344s; TotalTimePerSample = 0.31384ms; SamplesPerSecond = 3186
+ Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.23264771; err = 0.36718750; TotalTime = 0.81546s; TotalTimePerSample = 0.31854ms; SamplesPerSecond = 3139
+ Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.23756104; err = 0.37773438; TotalTime = 0.89764s; TotalTimePerSample = 0.35064ms; SamplesPerSecond = 2851
+Finished Epoch[ 1 of 2]: [Training] ce = 1.6532061; err = 0.46914062; learningRatePerSample = 0.003125000047; EpochTime=32.141459
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23025007; EvalErrorPrediction = 0.37500000; TotalTime = 0.93409s; TotalTimePerSample = 0.36488ms; SamplesPerSecond = 2740
- Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19630919; EvalErrorPrediction = 0.37031250; TotalTime = 0.82552s; TotalTimePerSample = 0.32247ms; SamplesPerSecond = 3101
- Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17490788; EvalErrorPrediction = 0.35585937; TotalTime = 0.79413s; TotalTimePerSample = 0.31021ms; SamplesPerSecond = 3223
- Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20401611; EvalErrorPrediction = 0.36210938; TotalTime = 0.81917s; TotalTimePerSample = 0.31999ms; SamplesPerSecond = 3125
- Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18899193; EvalErrorPrediction = 0.38164063; TotalTime = 0.84552s; TotalTimePerSample = 0.33028ms; SamplesPerSecond = 3027
- Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16435585; EvalErrorPrediction = 0.34492187; TotalTime = 0.92039s; TotalTimePerSample = 0.35953ms; SamplesPerSecond = 2781
- Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13413086; EvalErrorPrediction = 0.34570313; TotalTime = 0.78939s; TotalTimePerSample = 0.30836ms; SamplesPerSecond = 3243
- Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18822556; EvalErrorPrediction = 0.35820313; TotalTime = 1.28111s; TotalTimePerSample = 0.50043ms; SamplesPerSecond = 1998
- Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23435211; EvalErrorPrediction = 0.37265625; TotalTime = 0.84101s; TotalTimePerSample = 0.32852ms; SamplesPerSecond = 3043
- Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19776535; EvalErrorPrediction = 0.36406250; TotalTime = 0.79750s; TotalTimePerSample = 0.31153ms; SamplesPerSecond = 3210
- Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17883530; EvalErrorPrediction = 0.36601563; TotalTime = 0.78897s; TotalTimePerSample = 0.30819ms; SamplesPerSecond = 3244
- Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.26480560; EvalErrorPrediction = 0.39179687; TotalTime = 0.80044s; TotalTimePerSample = 0.31267ms; SamplesPerSecond = 3198
- Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19315338; EvalErrorPrediction = 0.36406250; TotalTime = 0.78345s; TotalTimePerSample = 0.30604ms; SamplesPerSecond = 3267
- Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22714691; EvalErrorPrediction = 0.38046875; TotalTime = 0.83964s; TotalTimePerSample = 0.32798ms; SamplesPerSecond = 3048
- Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21824493; EvalErrorPrediction = 0.37929687; TotalTime = 0.78280s; TotalTimePerSample = 0.30578ms; SamplesPerSecond = 3270
- Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16403503; EvalErrorPrediction = 0.35742188; TotalTime = 0.77936s; TotalTimePerSample = 0.30444ms; SamplesPerSecond = 3284
- Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14682007; EvalErrorPrediction = 0.35273437; TotalTime = 0.80358s; TotalTimePerSample = 0.31390ms; SamplesPerSecond = 3185
- Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16286163; EvalErrorPrediction = 0.34296875; TotalTime = 0.80497s; TotalTimePerSample = 0.31444ms; SamplesPerSecond = 3180
- Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15572357; EvalErrorPrediction = 0.35742188; TotalTime = 0.82832s; TotalTimePerSample = 0.32356ms; SamplesPerSecond = 3090
- Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09647369; EvalErrorPrediction = 0.33515625; TotalTime = 1.20946s; TotalTimePerSample = 0.47245ms; SamplesPerSecond = 2116
- Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15619965; EvalErrorPrediction = 0.35625000; TotalTime = 0.84213s; TotalTimePerSample = 0.32896ms; SamplesPerSecond = 3039
- Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18662567; EvalErrorPrediction = 0.36171875; TotalTime = 0.79636s; TotalTimePerSample = 0.31108ms; SamplesPerSecond = 3214
- Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19353638; EvalErrorPrediction = 0.37578125; TotalTime = 0.79557s; TotalTimePerSample = 0.31077ms; SamplesPerSecond = 3217
- Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15247192; EvalErrorPrediction = 0.34726563; TotalTime = 0.78307s; TotalTimePerSample = 0.30589ms; SamplesPerSecond = 3269
- Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15059509; EvalErrorPrediction = 0.35117188; TotalTime = 0.78616s; TotalTimePerSample = 0.30709ms; SamplesPerSecond = 3256
- Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07452087; EvalErrorPrediction = 0.32890625; TotalTime = 0.81104s; TotalTimePerSample = 0.31681ms; SamplesPerSecond = 3156
- Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10093384; EvalErrorPrediction = 0.34609375; TotalTime = 0.82247s; TotalTimePerSample = 0.32128ms; SamplesPerSecond = 3112
- Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06968994; EvalErrorPrediction = 0.33164063; TotalTime = 0.80968s; TotalTimePerSample = 0.31628ms; SamplesPerSecond = 3161
- Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11170044; EvalErrorPrediction = 0.34296875; TotalTime = 0.80222s; TotalTimePerSample = 0.31337ms; SamplesPerSecond = 3191
- Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15401306; EvalErrorPrediction = 0.35585937; TotalTime = 0.96892s; TotalTimePerSample = 0.37848ms; SamplesPerSecond = 2642
- Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12380981; EvalErrorPrediction = 0.35234375; TotalTime = 1.05247s; TotalTimePerSample = 0.41112ms; SamplesPerSecond = 2432
- Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07885132; EvalErrorPrediction = 0.32148437; TotalTime = 1.29900s; TotalTimePerSample = 0.50742ms; SamplesPerSecond = 1970
-Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.1648235; EvalErrorPrediction = 0.35716555; learningRatePerSample  = 0.003125000047; EpochTime=27.866186
+ Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  1.23025007; err = 0.37500000; TotalTime = 0.93409s; TotalTimePerSample = 0.36488ms; SamplesPerSecond = 2740
+ Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  1.19630919; err = 0.37031250; TotalTime = 0.82552s; TotalTimePerSample = 0.32247ms; SamplesPerSecond = 3101
+ Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  1.17490788; err = 0.35585937; TotalTime = 0.79413s; TotalTimePerSample = 0.31021ms; SamplesPerSecond = 3223
+ Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.20401611; err = 0.36210938; TotalTime = 0.81917s; TotalTimePerSample = 0.31999ms; SamplesPerSecond = 3125
+ Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.18899193; err = 0.38164063; TotalTime = 0.84552s; TotalTimePerSample = 0.33028ms; SamplesPerSecond = 3027
+ Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.16435585; err = 0.34492187; TotalTime = 0.92039s; TotalTimePerSample = 0.35953ms; SamplesPerSecond = 2781
+ Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.13413086; err = 0.34570313; TotalTime = 0.78939s; TotalTimePerSample = 0.30836ms; SamplesPerSecond = 3243
+ Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.18822556; err = 0.35820313; TotalTime = 1.28111s; TotalTimePerSample = 0.50043ms; SamplesPerSecond = 1998
+ Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.23435211; err = 0.37265625; TotalTime = 0.84101s; TotalTimePerSample = 0.32852ms; SamplesPerSecond = 3043
+ Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.19776535; err = 0.36406250; TotalTime = 0.79750s; TotalTimePerSample = 0.31153ms; SamplesPerSecond = 3210
+ Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.17883530; err = 0.36601563; TotalTime = 0.78897s; TotalTimePerSample = 0.30819ms; SamplesPerSecond = 3244
+ Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.26480560; err = 0.39179687; TotalTime = 0.80044s; TotalTimePerSample = 0.31267ms; SamplesPerSecond = 3198
+ Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.19315338; err = 0.36406250; TotalTime = 0.78345s; TotalTimePerSample = 0.30604ms; SamplesPerSecond = 3267
+ Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.22714691; err = 0.38046875; TotalTime = 0.83964s; TotalTimePerSample = 0.32798ms; SamplesPerSecond = 3048
+ Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.21824493; err = 0.37929687; TotalTime = 0.78280s; TotalTimePerSample = 0.30578ms; SamplesPerSecond = 3270
+ Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.16403503; err = 0.35742188; TotalTime = 0.77936s; TotalTimePerSample = 0.30444ms; SamplesPerSecond = 3284
+ Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.14682007; err = 0.35273437; TotalTime = 0.80358s; TotalTimePerSample = 0.31390ms; SamplesPerSecond = 3185
+ Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.16286163; err = 0.34296875; TotalTime = 0.80497s; TotalTimePerSample = 0.31444ms; SamplesPerSecond = 3180
+ Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.15572357; err = 0.35742188; TotalTime = 0.82832s; TotalTimePerSample = 0.32356ms; SamplesPerSecond = 3090
+ Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.09647369; err = 0.33515625; TotalTime = 1.20946s; TotalTimePerSample = 0.47245ms; SamplesPerSecond = 2116
+ Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.15619965; err = 0.35625000; TotalTime = 0.84213s; TotalTimePerSample = 0.32896ms; SamplesPerSecond = 3039
+ Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.18662567; err = 0.36171875; TotalTime = 0.79636s; TotalTimePerSample = 0.31108ms; SamplesPerSecond = 3214
+ Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.19353638; err = 0.37578125; TotalTime = 0.79557s; TotalTimePerSample = 0.31077ms; SamplesPerSecond = 3217
+ Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.15247192; err = 0.34726563; TotalTime = 0.78307s; TotalTimePerSample = 0.30589ms; SamplesPerSecond = 3269
+ Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.15059509; err = 0.35117188; TotalTime = 0.78616s; TotalTimePerSample = 0.30709ms; SamplesPerSecond = 3256
+ Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.07452087; err = 0.32890625; TotalTime = 0.81104s; TotalTimePerSample = 0.31681ms; SamplesPerSecond = 3156
+ Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.10093384; err = 0.34609375; TotalTime = 0.82247s; TotalTimePerSample = 0.32128ms; SamplesPerSecond = 3112
+ Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.06968994; err = 0.33164063; TotalTime = 0.80968s; TotalTimePerSample = 0.31628ms; SamplesPerSecond = 3161
+ Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.11170044; err = 0.34296875; TotalTime = 0.80222s; TotalTimePerSample = 0.31337ms; SamplesPerSecond = 3191
+ Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.15401306; err = 0.35585937; TotalTime = 0.96892s; TotalTimePerSample = 0.37848ms; SamplesPerSecond = 2642
+ Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.12380981; err = 0.35234375; TotalTime = 1.05247s; TotalTimePerSample = 0.41112ms; SamplesPerSecond = 2432
+ Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.07885132; err = 0.32148437; TotalTime = 1.29900s; TotalTimePerSample = 0.50742ms; SamplesPerSecond = 1970
+Finished Epoch[ 2 of 2]: [Training] ce = 1.1648235; err = 0.35716555; learningRatePerSample = 0.003125000047; EpochTime=27.866186
 CNTKCommandTrainEnd: DPT_Pre1
 
 
@@ -1793,76 +1793,76 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  4.54558067; EvalErrorPrediction = 0.80625000; TotalTime = 1.47303s; TotalTimePerSample = 0.57540ms; SamplesPerSecond = 1737
- Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.84298668; EvalErrorPrediction = 0.68515625; TotalTime = 1.40407s; TotalTimePerSample = 0.54846ms; SamplesPerSecond = 1823
- Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.27536545; EvalErrorPrediction = 0.60273438; TotalTime = 1.75662s; TotalTimePerSample = 0.68618ms; SamplesPerSecond = 1457
- Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.91519928; EvalErrorPrediction = 0.51406250; TotalTime = 1.35283s; TotalTimePerSample = 0.52845ms; SamplesPerSecond = 1892
- Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.68722076; EvalErrorPrediction = 0.46601562; TotalTime = 1.36410s; TotalTimePerSample = 0.53285ms; SamplesPerSecond = 1876
- Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.58424530; EvalErrorPrediction = 0.45742187; TotalTime = 1.36985s; TotalTimePerSample = 0.53510ms; SamplesPerSecond = 1868
- Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.49036713; EvalErrorPrediction = 0.43554688; TotalTime = 1.55377s; TotalTimePerSample = 0.60694ms; SamplesPerSecond = 1647
- Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.48389893; EvalErrorPrediction = 0.43085937; TotalTime = 1.39856s; TotalTimePerSample = 0.54631ms; SamplesPerSecond = 1830
- Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.45019989; EvalErrorPrediction = 0.41250000; TotalTime = 1.40718s; TotalTimePerSample = 0.54968ms; SamplesPerSecond = 1819
- Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41547852; EvalErrorPrediction = 0.40351562; TotalTime = 1.89277s; TotalTimePerSample = 0.73936ms; SamplesPerSecond = 1352
- Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.41315918; EvalErrorPrediction = 0.40742187; TotalTime = 1.39634s; TotalTimePerSample = 0.54545ms; SamplesPerSecond = 1833
- Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33824921; EvalErrorPrediction = 0.39179687; TotalTime = 1.57873s; TotalTimePerSample = 0.61669ms; SamplesPerSecond = 1621
- Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31290741; EvalErrorPrediction = 0.38593750; TotalTime = 1.36941s; TotalTimePerSample = 0.53493ms; SamplesPerSecond = 1869
- Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33027344; EvalErrorPrediction = 0.40039063; TotalTime = 1.35049s; TotalTimePerSample = 0.52753ms; SamplesPerSecond = 1895
- Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31422729; EvalErrorPrediction = 0.38632813; TotalTime = 1.35598s; TotalTimePerSample = 0.52968ms; SamplesPerSecond = 1887
- Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.26712646; EvalErrorPrediction = 0.38710937; TotalTime = 1.39183s; TotalTimePerSample = 0.54368ms; SamplesPerSecond = 1839
- Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.28440247; EvalErrorPrediction = 0.38242188; TotalTime = 1.69816s; TotalTimePerSample = 0.66334ms; SamplesPerSecond = 1507
- Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31289368; EvalErrorPrediction = 0.39648438; TotalTime = 1.39519s; TotalTimePerSample = 0.54500ms; SamplesPerSecond = 1834
- Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33919067; EvalErrorPrediction = 0.41250000; TotalTime = 1.39722s; TotalTimePerSample = 0.54579ms; SamplesPerSecond = 1832
- Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.31965942; EvalErrorPrediction = 0.41757813; TotalTime = 1.65563s; TotalTimePerSample = 0.64673ms; SamplesPerSecond = 1546
- Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23342590; EvalErrorPrediction = 0.37578125; TotalTime = 1.64381s; TotalTimePerSample = 0.64211ms; SamplesPerSecond = 1557
- Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.26905212; EvalErrorPrediction = 0.38671875; TotalTime = 1.40643s; TotalTimePerSample = 0.54938ms; SamplesPerSecond = 1820
- Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.27187195; EvalErrorPrediction = 0.37109375; TotalTime = 1.33750s; TotalTimePerSample = 0.52246ms; SamplesPerSecond = 1914
- Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.24517822; EvalErrorPrediction = 0.37382813; TotalTime = 1.70417s; TotalTimePerSample = 0.66569ms; SamplesPerSecond = 1502
- Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.21053467; EvalErrorPrediction = 0.37109375; TotalTime = 1.36920s; TotalTimePerSample = 0.53484ms; SamplesPerSecond = 1869
- Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18406067; EvalErrorPrediction = 0.36757812; TotalTime = 1.38623s; TotalTimePerSample = 0.54150ms; SamplesPerSecond = 1846
- Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23108521; EvalErrorPrediction = 0.36718750; TotalTime = 1.33867s; TotalTimePerSample = 0.52292ms; SamplesPerSecond = 1912
- Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18462524; EvalErrorPrediction = 0.36093750; TotalTime = 1.37471s; TotalTimePerSample = 0.53700ms; SamplesPerSecond = 1862
- Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17063904; EvalErrorPrediction = 0.35781250; TotalTime = 1.38296s; TotalTimePerSample = 0.54022ms; SamplesPerSecond = 1851
- Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14370728; EvalErrorPrediction = 0.34335938; TotalTime = 1.42403s; TotalTimePerSample = 0.55626ms; SamplesPerSecond = 1797
- Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17314453; EvalErrorPrediction = 0.34921875; TotalTime = 1.37369s; TotalTimePerSample = 0.53660ms; SamplesPerSecond = 1863
- Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18715210; EvalErrorPrediction = 0.36328125; TotalTime = 1.80418s; TotalTimePerSample = 0.70476ms; SamplesPerSecond = 1418
-Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.5133471; EvalErrorPrediction = 0.42406008; learningRatePerSample  = 0.003125000047; EpochTime=49.952198
+ Epoch[ 1 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  4.54558067; err = 0.80625000; TotalTime = 1.47303s; TotalTimePerSample = 0.57540ms; SamplesPerSecond = 1737
+ Epoch[ 1 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.84298668; err = 0.68515625; TotalTime = 1.40407s; TotalTimePerSample = 0.54846ms; SamplesPerSecond = 1823
+ Epoch[ 1 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.27536545; err = 0.60273438; TotalTime = 1.75662s; TotalTimePerSample = 0.68618ms; SamplesPerSecond = 1457
+ Epoch[ 1 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.91519928; err = 0.51406250; TotalTime = 1.35283s; TotalTimePerSample = 0.52845ms; SamplesPerSecond = 1892
+ Epoch[ 1 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.68722076; err = 0.46601562; TotalTime = 1.36410s; TotalTimePerSample = 0.53285ms; SamplesPerSecond = 1876
+ Epoch[ 1 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.58424530; err = 0.45742187; TotalTime = 1.36985s; TotalTimePerSample = 0.53510ms; SamplesPerSecond = 1868
+ Epoch[ 1 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.49036713; err = 0.43554688; TotalTime = 1.55377s; TotalTimePerSample = 0.60694ms; SamplesPerSecond = 1647
+ Epoch[ 1 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.48389893; err = 0.43085937; TotalTime = 1.39856s; TotalTimePerSample = 0.54631ms; SamplesPerSecond = 1830
+ Epoch[ 1 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.45019989; err = 0.41250000; TotalTime = 1.40718s; TotalTimePerSample = 0.54968ms; SamplesPerSecond = 1819
+ Epoch[ 1 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.41547852; err = 0.40351562; TotalTime = 1.89277s; TotalTimePerSample = 0.73936ms; SamplesPerSecond = 1352
+ Epoch[ 1 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.41315918; err = 0.40742187; TotalTime = 1.39634s; TotalTimePerSample = 0.54545ms; SamplesPerSecond = 1833
+ Epoch[ 1 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.33824921; err = 0.39179687; TotalTime = 1.57873s; TotalTimePerSample = 0.61669ms; SamplesPerSecond = 1621
+ Epoch[ 1 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.31290741; err = 0.38593750; TotalTime = 1.36941s; TotalTimePerSample = 0.53493ms; SamplesPerSecond = 1869
+ Epoch[ 1 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.33027344; err = 0.40039063; TotalTime = 1.35049s; TotalTimePerSample = 0.52753ms; SamplesPerSecond = 1895
+ Epoch[ 1 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.31422729; err = 0.38632813; TotalTime = 1.35598s; TotalTimePerSample = 0.52968ms; SamplesPerSecond = 1887
+ Epoch[ 1 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.26712646; err = 0.38710937; TotalTime = 1.39183s; TotalTimePerSample = 0.54368ms; SamplesPerSecond = 1839
+ Epoch[ 1 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.28440247; err = 0.38242188; TotalTime = 1.69816s; TotalTimePerSample = 0.66334ms; SamplesPerSecond = 1507
+ Epoch[ 1 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.31289368; err = 0.39648438; TotalTime = 1.39519s; TotalTimePerSample = 0.54500ms; SamplesPerSecond = 1834
+ Epoch[ 1 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.33919067; err = 0.41250000; TotalTime = 1.39722s; TotalTimePerSample = 0.54579ms; SamplesPerSecond = 1832
+ Epoch[ 1 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.31965942; err = 0.41757813; TotalTime = 1.65563s; TotalTimePerSample = 0.64673ms; SamplesPerSecond = 1546
+ Epoch[ 1 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.23342590; err = 0.37578125; TotalTime = 1.64381s; TotalTimePerSample = 0.64211ms; SamplesPerSecond = 1557
+ Epoch[ 1 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.26905212; err = 0.38671875; TotalTime = 1.40643s; TotalTimePerSample = 0.54938ms; SamplesPerSecond = 1820
+ Epoch[ 1 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.27187195; err = 0.37109375; TotalTime = 1.33750s; TotalTimePerSample = 0.52246ms; SamplesPerSecond = 1914
+ Epoch[ 1 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.24517822; err = 0.37382813; TotalTime = 1.70417s; TotalTimePerSample = 0.66569ms; SamplesPerSecond = 1502
+ Epoch[ 1 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.21053467; err = 0.37109375; TotalTime = 1.36920s; TotalTimePerSample = 0.53484ms; SamplesPerSecond = 1869
+ Epoch[ 1 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.18406067; err = 0.36757812; TotalTime = 1.38623s; TotalTimePerSample = 0.54150ms; SamplesPerSecond = 1846
+ Epoch[ 1 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.23108521; err = 0.36718750; TotalTime = 1.33867s; TotalTimePerSample = 0.52292ms; SamplesPerSecond = 1912
+ Epoch[ 1 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.18462524; err = 0.36093750; TotalTime = 1.37471s; TotalTimePerSample = 0.53700ms; SamplesPerSecond = 1862
+ Epoch[ 1 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.17063904; err = 0.35781250; TotalTime = 1.38296s; TotalTimePerSample = 0.54022ms; SamplesPerSecond = 1851
+ Epoch[ 1 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.14370728; err = 0.34335938; TotalTime = 1.42403s; TotalTimePerSample = 0.55626ms; SamplesPerSecond = 1797
+ Epoch[ 1 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.17314453; err = 0.34921875; TotalTime = 1.37369s; TotalTimePerSample = 0.53660ms; SamplesPerSecond = 1863
+ Epoch[ 1 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.18715210; err = 0.36328125; TotalTime = 1.80418s; TotalTimePerSample = 0.70476ms; SamplesPerSecond = 1418
+Finished Epoch[ 1 of 2]: [Training] ce = 1.5133471; err = 0.42406008; learningRatePerSample = 0.003125000047; EpochTime=49.952198
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18743801; EvalErrorPrediction = 0.35585937; TotalTime = 1.47891s; TotalTimePerSample = 0.57770ms; SamplesPerSecond = 1731
- Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14856281; EvalErrorPrediction = 0.34804687; TotalTime = 1.38199s; TotalTimePerSample = 0.53984ms; SamplesPerSecond = 1852
- Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  1.15754395; EvalErrorPrediction = 0.34804687; TotalTime = 1.35597s; TotalTimePerSample = 0.52968ms; SamplesPerSecond = 1887
- Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14760780; EvalErrorPrediction = 0.34375000; TotalTime = 1.36131s; TotalTimePerSample = 0.53176ms; SamplesPerSecond = 1880
- Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14606209; EvalErrorPrediction = 0.35546875; TotalTime = 1.39984s; TotalTimePerSample = 0.54681ms; SamplesPerSecond = 1828
- Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13943710; EvalErrorPrediction = 0.33593750; TotalTime = 1.39025s; TotalTimePerSample = 0.54307ms; SamplesPerSecond = 1841
- Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09978561; EvalErrorPrediction = 0.33632812; TotalTime = 1.83114s; TotalTimePerSample = 0.71529ms; SamplesPerSecond = 1398
- Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16258240; EvalErrorPrediction = 0.35468750; TotalTime = 1.37695s; TotalTimePerSample = 0.53787ms; SamplesPerSecond = 1859
- Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16670074; EvalErrorPrediction = 0.36015625; TotalTime = 1.39585s; TotalTimePerSample = 0.54525ms; SamplesPerSecond = 1834
- Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12352142; EvalErrorPrediction = 0.33867188; TotalTime = 1.36493s; TotalTimePerSample = 0.53317ms; SamplesPerSecond = 1875
- Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12195129; EvalErrorPrediction = 0.34570313; TotalTime = 1.33980s; TotalTimePerSample = 0.52336ms; SamplesPerSecond = 1910
- Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18841553; EvalErrorPrediction = 0.36562500; TotalTime = 1.34158s; TotalTimePerSample = 0.52405ms; SamplesPerSecond = 1908
- Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14197235; EvalErrorPrediction = 0.34062500; TotalTime = 1.33997s; TotalTimePerSample = 0.52343ms; SamplesPerSecond = 1910
- Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.17212372; EvalErrorPrediction = 0.35742188; TotalTime = 1.80645s; TotalTimePerSample = 0.70565ms; SamplesPerSecond = 1417
- Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12667999; EvalErrorPrediction = 0.35273437; TotalTime = 1.38029s; TotalTimePerSample = 0.53918ms; SamplesPerSecond = 1854
- Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.09235840; EvalErrorPrediction = 0.33476563; TotalTime = 1.43088s; TotalTimePerSample = 0.55894ms; SamplesPerSecond = 1789
- Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10599670; EvalErrorPrediction = 0.34062500; TotalTime = 1.34357s; TotalTimePerSample = 0.52483ms; SamplesPerSecond = 1905
- Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11184387; EvalErrorPrediction = 0.33320312; TotalTime = 1.38528s; TotalTimePerSample = 0.54113ms; SamplesPerSecond = 1847
- Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10395508; EvalErrorPrediction = 0.34023437; TotalTime = 1.39965s; TotalTimePerSample = 0.54674ms; SamplesPerSecond = 1829
- Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.06003723; EvalErrorPrediction = 0.32773438; TotalTime = 1.37113s; TotalTimePerSample = 0.53560ms; SamplesPerSecond = 1867
- Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.10617218; EvalErrorPrediction = 0.33476563; TotalTime = 1.80950s; TotalTimePerSample = 0.70684ms; SamplesPerSecond = 1414
- Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14527283; EvalErrorPrediction = 0.35429688; TotalTime = 1.37585s; TotalTimePerSample = 0.53744ms; SamplesPerSecond = 1860
- Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12649536; EvalErrorPrediction = 0.34414062; TotalTime = 1.35334s; TotalTimePerSample = 0.52865ms; SamplesPerSecond = 1891
- Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11939697; EvalErrorPrediction = 0.33632812; TotalTime = 1.33956s; TotalTimePerSample = 0.52327ms; SamplesPerSecond = 1911
- Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11855164; EvalErrorPrediction = 0.34375000; TotalTime = 1.36494s; TotalTimePerSample = 0.53318ms; SamplesPerSecond = 1875
- Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.04503174; EvalErrorPrediction = 0.32539062; TotalTime = 1.38926s; TotalTimePerSample = 0.54268ms; SamplesPerSecond = 1842
- Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.04819031; EvalErrorPrediction = 0.33281250; TotalTime = 1.36162s; TotalTimePerSample = 0.53188ms; SamplesPerSecond = 1880
- Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.03961487; EvalErrorPrediction = 0.32070312; TotalTime = 1.37349s; TotalTimePerSample = 0.53652ms; SamplesPerSecond = 1863
- Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.07194214; EvalErrorPrediction = 0.33242187; TotalTime = 1.81902s; TotalTimePerSample = 0.71056ms; SamplesPerSecond = 1407
- Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11237183; EvalErrorPrediction = 0.34335938; TotalTime = 1.36152s; TotalTimePerSample = 0.53184ms; SamplesPerSecond = 1880
- Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.08430786; EvalErrorPrediction = 0.32890625; TotalTime = 1.43272s; TotalTimePerSample = 0.55965ms; SamplesPerSecond = 1786
- Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.05424194; EvalErrorPrediction = 0.31484375; TotalTime = 1.34370s; TotalTimePerSample = 0.52488ms; SamplesPerSecond = 1905
-Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.1180052; EvalErrorPrediction = 0.34147951; learningRatePerSample  = 0.003125000047; EpochTime=45.82816
+ Epoch[ 2 of 2]-Minibatch[   1-  10 of 320]: * 2560; ce =  1.18743801; err = 0.35585937; TotalTime = 1.47891s; TotalTimePerSample = 0.57770ms; SamplesPerSecond = 1731
+ Epoch[ 2 of 2]-Minibatch[  11-  20 of 320]: * 2560; ce =  1.14856281; err = 0.34804687; TotalTime = 1.38199s; TotalTimePerSample = 0.53984ms; SamplesPerSecond = 1852
+ Epoch[ 2 of 2]-Minibatch[  21-  30 of 320]: * 2560; ce =  1.15754395; err = 0.34804687; TotalTime = 1.35597s; TotalTimePerSample = 0.52968ms; SamplesPerSecond = 1887
+ Epoch[ 2 of 2]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.14760780; err = 0.34375000; TotalTime = 1.36131s; TotalTimePerSample = 0.53176ms; SamplesPerSecond = 1880
+ Epoch[ 2 of 2]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.14606209; err = 0.35546875; TotalTime = 1.39984s; TotalTimePerSample = 0.54681ms; SamplesPerSecond = 1828
+ Epoch[ 2 of 2]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.13943710; err = 0.33593750; TotalTime = 1.39025s; TotalTimePerSample = 0.54307ms; SamplesPerSecond = 1841
+ Epoch[ 2 of 2]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.09978561; err = 0.33632812; TotalTime = 1.83114s; TotalTimePerSample = 0.71529ms; SamplesPerSecond = 1398
+ Epoch[ 2 of 2]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.16258240; err = 0.35468750; TotalTime = 1.37695s; TotalTimePerSample = 0.53787ms; SamplesPerSecond = 1859
+ Epoch[ 2 of 2]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.16670074; err = 0.36015625; TotalTime = 1.39585s; TotalTimePerSample = 0.54525ms; SamplesPerSecond = 1834
+ Epoch[ 2 of 2]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.12352142; err = 0.33867188; TotalTime = 1.36493s; TotalTimePerSample = 0.53317ms; SamplesPerSecond = 1875
+ Epoch[ 2 of 2]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.12195129; err = 0.34570313; TotalTime = 1.33980s; TotalTimePerSample = 0.52336ms; SamplesPerSecond = 1910
+ Epoch[ 2 of 2]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.18841553; err = 0.36562500; TotalTime = 1.34158s; TotalTimePerSample = 0.52405ms; SamplesPerSecond = 1908
+ Epoch[ 2 of 2]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.14197235; err = 0.34062500; TotalTime = 1.33997s; TotalTimePerSample = 0.52343ms; SamplesPerSecond = 1910
+ Epoch[ 2 of 2]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.17212372; err = 0.35742188; TotalTime = 1.80645s; TotalTimePerSample = 0.70565ms; SamplesPerSecond = 1417
+ Epoch[ 2 of 2]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.12667999; err = 0.35273437; TotalTime = 1.38029s; TotalTimePerSample = 0.53918ms; SamplesPerSecond = 1854
+ Epoch[ 2 of 2]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.09235840; err = 0.33476563; TotalTime = 1.43088s; TotalTimePerSample = 0.55894ms; SamplesPerSecond = 1789
+ Epoch[ 2 of 2]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.10599670; err = 0.34062500; TotalTime = 1.34357s; TotalTimePerSample = 0.52483ms; SamplesPerSecond = 1905
+ Epoch[ 2 of 2]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.11184387; err = 0.33320312; TotalTime = 1.38528s; TotalTimePerSample = 0.54113ms; SamplesPerSecond = 1847
+ Epoch[ 2 of 2]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.10395508; err = 0.34023437; TotalTime = 1.39965s; TotalTimePerSample = 0.54674ms; SamplesPerSecond = 1829
+ Epoch[ 2 of 2]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.06003723; err = 0.32773438; TotalTime = 1.37113s; TotalTimePerSample = 0.53560ms; SamplesPerSecond = 1867
+ Epoch[ 2 of 2]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.10617218; err = 0.33476563; TotalTime = 1.80950s; TotalTimePerSample = 0.70684ms; SamplesPerSecond = 1414
+ Epoch[ 2 of 2]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.14527283; err = 0.35429688; TotalTime = 1.37585s; TotalTimePerSample = 0.53744ms; SamplesPerSecond = 1860
+ Epoch[ 2 of 2]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.12649536; err = 0.34414062; TotalTime = 1.35334s; TotalTimePerSample = 0.52865ms; SamplesPerSecond = 1891
+ Epoch[ 2 of 2]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.11939697; err = 0.33632812; TotalTime = 1.33956s; TotalTimePerSample = 0.52327ms; SamplesPerSecond = 1911
+ Epoch[ 2 of 2]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.11855164; err = 0.34375000; TotalTime = 1.36494s; TotalTimePerSample = 0.53318ms; SamplesPerSecond = 1875
+ Epoch[ 2 of 2]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.04503174; err = 0.32539062; TotalTime = 1.38926s; TotalTimePerSample = 0.54268ms; SamplesPerSecond = 1842
+ Epoch[ 2 of 2]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.04819031; err = 0.33281250; TotalTime = 1.36162s; TotalTimePerSample = 0.53188ms; SamplesPerSecond = 1880
+ Epoch[ 2 of 2]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.03961487; err = 0.32070312; TotalTime = 1.37349s; TotalTimePerSample = 0.53652ms; SamplesPerSecond = 1863
+ Epoch[ 2 of 2]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.07194214; err = 0.33242187; TotalTime = 1.81902s; TotalTimePerSample = 0.71056ms; SamplesPerSecond = 1407
+ Epoch[ 2 of 2]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.11237183; err = 0.34335938; TotalTime = 1.36152s; TotalTimePerSample = 0.53184ms; SamplesPerSecond = 1880
+ Epoch[ 2 of 2]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.08430786; err = 0.32890625; TotalTime = 1.43272s; TotalTimePerSample = 0.55965ms; SamplesPerSecond = 1786
+ Epoch[ 2 of 2]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.05424194; err = 0.31484375; TotalTime = 1.34370s; TotalTimePerSample = 0.52488ms; SamplesPerSecond = 1905
+Finished Epoch[ 2 of 2]: [Training] ce = 1.1180052; err = 0.34147951; learningRatePerSample = 0.003125000047; EpochTime=45.82816
 CNTKCommandTrainEnd: DPT_Pre2
 
 
@@ -3177,101 +3177,101 @@ minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data
 requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
 Starting minibatch loop.
- Epoch[ 1 of 4]-Minibatch[   1-  10 of 320]: * 2560; CrossEntropyWithSoftmax =  4.17336311; EvalErrorPrediction = 0.84023437; TotalTime = 2.06124s; TotalTimePerSample = 0.80517ms; SamplesPerSecond = 1241
- Epoch[ 1 of 4]-Minibatch[  11-  20 of 320]: * 2560; CrossEntropyWithSoftmax =  2.58318291; EvalErrorPrediction = 0.64453125; TotalTime = 1.93110s; TotalTimePerSample = 0.75433ms; SamplesPerSecond = 1325
- Epoch[ 1 of 4]-Minibatch[  21-  30 of 320]: * 2560; CrossEntropyWithSoftmax =  2.05603027; EvalErrorPrediction = 0.54648438; TotalTime = 1.90603s; TotalTimePerSample = 0.74454ms; SamplesPerSecond = 1343
- Epoch[ 1 of 4]-Minibatch[  31-  40 of 320]: * 2560; CrossEntropyWithSoftmax =  1.74777603; EvalErrorPrediction = 0.47773437; TotalTime = 1.91457s; TotalTimePerSample = 0.74788ms; SamplesPerSecond = 1337
- Epoch[ 1 of 4]-Minibatch[  41-  50 of 320]: * 2560; CrossEntropyWithSoftmax =  1.54688110; EvalErrorPrediction = 0.43867187; TotalTime = 2.54444s; TotalTimePerSample = 0.99392ms; SamplesPerSecond = 1006
- Epoch[ 1 of 4]-Minibatch[  51-  60 of 320]: * 2560; CrossEntropyWithSoftmax =  1.44241409; EvalErrorPrediction = 0.41406250; TotalTime = 2.03288s; TotalTimePerSample = 0.79410ms; SamplesPerSecond = 1259
- Epoch[ 1 of 4]-Minibatch[  61-  70 of 320]: * 2560; CrossEntropyWithSoftmax =  1.35854034; EvalErrorPrediction = 0.40312500; TotalTime = 2.14522s; TotalTimePerSample = 0.83798ms; SamplesPerSecond = 1193
- Epoch[ 1 of 4]-Minibatch[  71-  80 of 320]: * 2560; CrossEntropyWithSoftmax =  1.35585175; EvalErrorPrediction = 0.39531250; TotalTime = 1.92814s; TotalTimePerSample = 0.75318ms; SamplesPerSecond = 1327
- Epoch[ 1 of 4]-Minibatch[  81-  90 of 320]: * 2560; CrossEntropyWithSoftmax =  1.33494263; EvalErrorPrediction = 0.38789062; TotalTime = 2.00338s; TotalTimePerSample = 0.78257ms; SamplesPerSecond = 1277
- Epoch[ 1 of 4]-Minibatch[  91- 100 of 320]: * 2560; CrossEntropyWithSoftmax =  1.30348053; EvalErrorPrediction = 0.38320312; TotalTime = 2.44259s; TotalTimePerSample = 0.95414ms; SamplesPerSecond = 1048
- Epoch[ 1 of 4]-Minibatch[ 101- 110 of 320]: * 2560; CrossEntropyWithSoftmax =  1.30501862; EvalErrorPrediction = 0.38085938; TotalTime = 1.92867s; TotalTimePerSample = 0.75339ms; SamplesPerSecond = 1327
- Epoch[ 1 of 4]-Minibatch[ 111- 120 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23170624; EvalErrorPrediction = 0.36640625; TotalTime = 3.54710s; TotalTimePerSample = 1.38558ms; SamplesPerSecond = 721
- Epoch[ 1 of 4]-Minibatch[ 121- 130 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20843506; EvalErrorPrediction = 0.35546875; TotalTime = 2.56219s; TotalTimePerSample = 1.00086ms; SamplesPerSecond = 999
- Epoch[ 1 of 4]-Minibatch[ 131- 140 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23209686; EvalErrorPrediction = 0.36601563; TotalTime = 2.63928s; TotalTimePerSample = 1.03097ms; SamplesPerSecond = 969
- Epoch[ 1 of 4]-Minibatch[ 141- 150 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22095490; EvalErrorPrediction = 0.37070313; TotalTime = 1.94429s; TotalTimePerSample = 0.75949ms; SamplesPerSecond = 1316
- Epoch[ 1 of 4]-Minibatch[ 151- 160 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18400879; EvalErrorPrediction = 0.35156250; TotalTime = 1.94206s; TotalTimePerSample = 0.75862ms; SamplesPerSecond = 1318
- Epoch[ 1 of 4]-Minibatch[ 161- 170 of 320]: * 2560; CrossEntropyWithSoftmax =  1.20606384; EvalErrorPrediction = 0.35976562; TotalTime = 1.91605s; TotalTimePerSample = 0.74846ms; SamplesPerSecond = 1336
- Epoch[ 1 of 4]-Minibatch[ 171- 180 of 320]: * 2560; CrossEntropyWithSoftmax =  1.23779907; EvalErrorPrediction = 0.37617187; TotalTime = 1.92765s; TotalTimePerSample = 0.75299ms; SamplesPerSecond = 1328
- Epoch[ 1 of 4]-Minibatch[ 181- 190 of 320]: * 2560; CrossEntropyWithSoftmax =  1.26740112; EvalErrorPrediction = 0.37851563; TotalTime = 2.45290s; TotalTimePerSample = 0.95817ms; SamplesPerSecond = 1043
- Epoch[ 1 of 4]-Minibatch[ 191- 200 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22087402; EvalErrorPrediction = 0.38046875; TotalTime = 1.94683s; TotalTimePerSample = 0.76048ms; SamplesPerSecond = 1314
- Epoch[ 1 of 4]-Minibatch[ 201- 210 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18200684; EvalErrorPrediction = 0.36210938; TotalTime = 1.92878s; TotalTimePerSample = 0.75343ms; SamplesPerSecond = 1327
- Epoch[ 1 of 4]-Minibatch[ 211- 220 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19344177; EvalErrorPrediction = 0.37070313; TotalTime = 1.94856s; TotalTimePerSample = 0.76116ms; SamplesPerSecond = 1313
- Epoch[ 1 of 4]-Minibatch[ 221- 230 of 320]: * 2560; CrossEntropyWithSoftmax =  1.22325745; EvalErrorPrediction = 0.35468750; TotalTime = 2.01215s; TotalTimePerSample = 0.78600ms; SamplesPerSecond = 1272
- Epoch[ 1 of 4]-Minibatch[ 231- 240 of 320]: * 2560; CrossEntropyWithSoftmax =  1.19638977; EvalErrorPrediction = 0.35898438; TotalTime = 2.45478s; TotalTimePerSample = 0.95890ms; SamplesPerSecond = 1042
- Epoch[ 1 of 4]-Minibatch[ 241- 250 of 320]: * 2560; CrossEntropyWithSoftmax =  1.16811523; EvalErrorPrediction = 0.36562500; TotalTime = 2.04822s; TotalTimePerSample = 0.80009ms; SamplesPerSecond = 1249
- Epoch[ 1 of 4]-Minibatch[ 251- 260 of 320]: * 2560; CrossEntropyWithSoftmax =  1.12518311; EvalErrorPrediction = 0.34765625; TotalTime = 1.96534s; TotalTimePerSample = 0.76771ms; SamplesPerSecond = 1302
- Epoch[ 1 of 4]-Minibatch[ 261- 270 of 320]: * 2560; CrossEntropyWithSoftmax =  1.18634338; EvalErrorPrediction = 0.35312500; TotalTime = 1.99261s; TotalTimePerSample = 0.77836ms; SamplesPerSecond = 1284
- Epoch[ 1 of 4]-Minibatch[ 271- 280 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14113159; EvalErrorPrediction = 0.35000000; TotalTime = 1.92141s; TotalTimePerSample = 0.75055ms; SamplesPerSecond = 1332
- Epoch[ 1 of 4]-Minibatch[ 281- 290 of 320]: * 2560; CrossEntropyWithSoftmax =  1.14277954; EvalErrorPrediction = 0.34414062; TotalTime = 2.31260s; TotalTimePerSample = 0.90336ms; SamplesPerSecond = 1106
- Epoch[ 1 of 4]-Minibatch[ 291- 300 of 320]: * 2560; CrossEntropyWithSoftmax =  1.11815796; EvalErrorPrediction = 0.33867188; TotalTime = 1.95541s; TotalTimePerSample = 0.76383ms; SamplesPerSecond = 1309
- Epoch[ 1 of 4]-Minibatch[ 301- 310 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13609314; EvalErrorPrediction = 0.34140625; TotalTime = 1.91000s; TotalTimePerSample = 0.74609ms; SamplesPerSecond = 1340
- Epoch[ 1 of 4]-Minibatch[ 311- 320 of 320]: * 2560; CrossEntropyWithSoftmax =  1.13069458; EvalErrorPrediction = 0.34609375; TotalTime = 1.90057s; TotalTimePerSample = 0.74241ms; SamplesPerSecond = 1346
-Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 1.411263; EvalErrorPrediction = 0.4015747; learningRatePerSample  = 0.003125000047; EpochTime=71.007888
+ Epoch[ 1 of 4]-Minibatch[   1-  10 of 320]: * 2560; ce =  4.17336311; err = 0.84023437; TotalTime = 2.06124s; TotalTimePerSample = 0.80517ms; SamplesPerSecond = 1241
+ Epoch[ 1 of 4]-Minibatch[  11-  20 of 320]: * 2560; ce =  2.58318291; err = 0.64453125; TotalTime = 1.93110s; TotalTimePerSample = 0.75433ms; SamplesPerSecond = 1325
+ Epoch[ 1 of 4]-Minibatch[  21-  30 of 320]: * 2560; ce =  2.05603027; err = 0.54648438; TotalTime = 1.90603s; TotalTimePerSample = 0.74454ms; SamplesPerSecond = 1343
+ Epoch[ 1 of 4]-Minibatch[  31-  40 of 320]: * 2560; ce =  1.74777603; err = 0.47773437; TotalTime = 1.91457s; TotalTimePerSample = 0.74788ms; SamplesPerSecond = 1337
+ Epoch[ 1 of 4]-Minibatch[  41-  50 of 320]: * 2560; ce =  1.54688110; err = 0.43867187; TotalTime = 2.54444s; TotalTimePerSample = 0.99392ms; SamplesPerSecond = 1006
+ Epoch[ 1 of 4]-Minibatch[  51-  60 of 320]: * 2560; ce =  1.44241409; err = 0.41406250; TotalTime = 2.03288s; TotalTimePerSample = 0.79410ms; SamplesPerSecond = 1259
+ Epoch[ 1 of 4]-Minibatch[  61-  70 of 320]: * 2560; ce =  1.35854034; err = 0.40312500; TotalTime = 2.14522s; TotalTimePerSample = 0.83798ms; SamplesPerSecond = 1193
+ Epoch[ 1 of 4]-Minibatch[  71-  80 of 320]: * 2560; ce =  1.35585175; err = 0.39531250; TotalTime = 1.92814s; TotalTimePerSample = 0.75318ms; SamplesPerSecond = 1327
+ Epoch[ 1 of 4]-Minibatch[  81-  90 of 320]: * 2560; ce =  1.33494263; err = 0.38789062; TotalTime = 2.00338s; TotalTimePerSample = 0.78257ms; SamplesPerSecond = 1277
+ Epoch[ 1 of 4]-Minibatch[  91- 100 of 320]: * 2560; ce =  1.30348053; err = 0.38320312; TotalTime = 2.44259s; TotalTimePerSample = 0.95414ms; SamplesPerSecond = 1048
+ Epoch[ 1 of 4]-Minibatch[ 101- 110 of 320]: * 2560; ce =  1.30501862; err = 0.38085938; TotalTime = 1.92867s; TotalTimePerSample = 0.75339ms; SamplesPerSecond = 1327
+ Epoch[ 1 of 4]-Minibatch[ 111- 120 of 320]: * 2560; ce =  1.23170624; err = 0.36640625; TotalTime = 3.54710s; TotalTimePerSample = 1.38558ms; SamplesPerSecond = 721
+ Epoch[ 1 of 4]-Minibatch[ 121- 130 of 320]: * 2560; ce =  1.20843506; err = 0.35546875; TotalTime = 2.56219s; TotalTimePerSample = 1.00086ms; SamplesPerSecond = 999
+ Epoch[ 1 of 4]-Minibatch[ 131- 140 of 320]: * 2560; ce =  1.23209686; err = 0.36601563; TotalTime = 2.63928s; TotalTimePerSample = 1.03097ms; SamplesPerSecond = 969
+ Epoch[ 1 of 4]-Minibatch[ 141- 150 of 320]: * 2560; ce =  1.22095490; err = 0.37070313; TotalTime = 1.94429s; TotalTimePerSample = 0.75949ms; SamplesPerSecond = 1316
+ Epoch[ 1 of 4]-Minibatch[ 151- 160 of 320]: * 2560; ce =  1.18400879; err = 0.35156250; TotalTime = 1.94206s; TotalTimePerSample = 0.75862ms; SamplesPerSecond = 1318
+ Epoch[ 1 of 4]-Minibatch[ 161- 170 of 320]: * 2560; ce =  1.20606384; err = 0.35976562; TotalTime = 1.91605s; TotalTimePerSample = 0.74846ms; SamplesPerSecond = 1336
+ Epoch[ 1 of 4]-Minibatch[ 171- 180 of 320]: * 2560; ce =  1.23779907; err = 0.37617187; TotalTime = 1.92765s; TotalTimePerSample = 0.75299ms; SamplesPerSecond = 1328
+ Epoch[ 1 of 4]-Minibatch[ 181- 190 of 320]: * 2560; ce =  1.26740112; err = 0.37851563; TotalTime = 2.45290s; TotalTimePerSample = 0.95817ms; SamplesPerSecond = 1043
+ Epoch[ 1 of 4]-Minibatch[ 191- 200 of 320]: * 2560; ce =  1.22087402; err = 0.38046875; TotalTime = 1.94683s; TotalTimePerSample = 0.76048ms; SamplesPerSecond = 1314
+ Epoch[ 1 of 4]-Minibatch[ 201- 210 of 320]: * 2560; ce =  1.18200684; err = 0.36210938; TotalTime = 1.92878s; TotalTimePerSample = 0.75343ms; SamplesPerSecond = 1327
+ Epoch[ 1 of 4]-Minibatch[ 211- 220 of 320]: * 2560; ce =  1.19344177; err = 0.37070313; TotalTime = 1.94856s; TotalTimePerSample = 0.76116ms; SamplesPerSecond = 1313
+ Epoch[ 1 of 4]-Minibatch[ 221- 230 of 320]: * 2560; ce =  1.22325745; err = 0.35468750; TotalTime = 2.01215s; TotalTimePerSample = 0.78600ms; SamplesPerSecond = 1272
+ Epoch[ 1 of 4]-Minibatch[ 231- 240 of 320]: * 2560; ce =  1.19638977; err = 0.35898438; TotalTime = 2.45478s; TotalTimePerSample = 0.95890ms; SamplesPerSecond = 1042
+ Epoch[ 1 of 4]-Minibatch[ 241- 250 of 320]: * 2560; ce =  1.16811523; err = 0.36562500; TotalTime = 2.04822s; TotalTimePerSample = 0.80009ms; SamplesPerSecond = 1249
+ Epoch[ 1 of 4]-Minibatch[ 251- 260 of 320]: * 2560; ce =  1.12518311; err = 0.34765625; TotalTime = 1.96534s; TotalTimePerSample = 0.76771ms; SamplesPerSecond = 1302
+ Epoch[ 1 of 4]-Minibatch[ 261- 270 of 320]: * 2560; ce =  1.18634338; err = 0.35312500; TotalTime = 1.99261s; TotalTimePerSample = 0.77836ms; SamplesPerSecond = 1284
+ Epoch[ 1 of 4]-Minibatch[ 271- 280 of 320]: * 2560; ce =  1.14113159; err = 0.35000000; TotalTime = 1.92141s; TotalTimePerSample = 0.75055ms; SamplesPerSecond = 1332
+ Epoch[ 1 of 4]-Minibatch[ 281- 290 of 320]: * 2560; ce =  1.14277954; err = 0.34414062; TotalTime = 2.31260s; TotalTimePerSample = 0.90336ms; SamplesPerSecond = 1106
+ Epoch[ 1 of 4]-Minibatch[ 291- 300 of 320]: * 2560; ce =  1.11815796; err = 0.33867188; TotalTime = 1.95541s; TotalTimePerSample = 0.76383ms; SamplesPerSecond = 1309
+ Epoch[ 1 of 4]-Minibatch[ 301- 310 of 320]: * 2560; ce =  1.13609314; err = 0.34140625; TotalTime = 1.91000s; TotalTimePerSample = 0.74609ms; SamplesPerSecond = 1340
+ Epoch[ 1 of 4]-Minibatch[ 311- 320 of 320]: * 2560; ce =  1.13069458; err = 0.34609375; TotalTime = 1.90057s; TotalTimePerSample = 0.74241ms; SamplesPerSecond = 1346
+Finished Epoch[ 1 of 4]: [Training] ce = 1.411263; err = 0.4015747; learningRatePerSample = 0.003125000047; EpochTime=71.007888
 Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 2 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.20858374; EvalErrorPrediction = 0.36269531; TotalTime = 2.60521s; TotalTimePerSample = 0.50883ms; SamplesPerSecond = 1965
- Epoch[ 2 of 4]-Minibatch[  11-  20 of 160]: * 5120; CrossEntropyWithSoftmax =  1.15259104; EvalErrorPrediction = 0.34609375; TotalTime = 2.83770s; TotalTimePerSample = 0.55424ms; SamplesPerSecond = 1804
- Epoch[ 2 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11579819; EvalErrorPrediction = 0.34238281; TotalTime = 2.41000s; TotalTimePerSample = 0.47070ms; SamplesPerSecond = 2124
- Epoch[ 2 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11495934; EvalErrorPrediction = 0.34570313; TotalTime = 2.45457s; TotalTimePerSample = 0.47941ms; SamplesPerSecond = 2085
- Epoch[ 2 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  1.19023743; EvalErrorPrediction = 0.36894531; TotalTime = 2.40870s; TotalTimePerSample = 0.47045ms; SamplesPerSecond = 2125
- Epoch[ 2 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  1.18024330; EvalErrorPrediction = 0.36210938; TotalTime = 2.75762s; TotalTimePerSample = 0.53860ms; SamplesPerSecond = 1856
- Epoch[ 2 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  1.16740799; EvalErrorPrediction = 0.35175781; TotalTime = 2.46142s; TotalTimePerSample = 0.48075ms; SamplesPerSecond = 2080
- Epoch[ 2 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09000397; EvalErrorPrediction = 0.34121094; TotalTime = 2.43527s; TotalTimePerSample = 0.47564ms; SamplesPerSecond = 2102
- Epoch[ 2 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09481125; EvalErrorPrediction = 0.33378906; TotalTime = 2.46109s; TotalTimePerSample = 0.48068ms; SamplesPerSecond = 2080
- Epoch[ 2 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06353989; EvalErrorPrediction = 0.32480469; TotalTime = 2.90383s; TotalTimePerSample = 0.56715ms; SamplesPerSecond = 1763
- Epoch[ 2 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11828995; EvalErrorPrediction = 0.34199219; TotalTime = 2.37363s; TotalTimePerSample = 0.46360ms; SamplesPerSecond = 2157
- Epoch[ 2 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  1.15714111; EvalErrorPrediction = 0.35742188; TotalTime = 2.41495s; TotalTimePerSample = 0.47167ms; SamplesPerSecond = 2120
- Epoch[ 2 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06128235; EvalErrorPrediction = 0.32871094; TotalTime = 2.42177s; TotalTimePerSample = 0.47300ms; SamplesPerSecond = 2114
- Epoch[ 2 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  1.03393250; EvalErrorPrediction = 0.32167969; TotalTime = 2.86138s; TotalTimePerSample = 0.55886ms; SamplesPerSecond = 1789
- Epoch[ 2 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09766846; EvalErrorPrediction = 0.33828125; TotalTime = 2.49456s; TotalTimePerSample = 0.48722ms; SamplesPerSecond = 2052
- Epoch[ 2 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  1.07232513; EvalErrorPrediction = 0.32617188; TotalTime = 2.62645s; TotalTimePerSample = 0.51298ms; SamplesPerSecond = 1949
-Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 1.119926; EvalErrorPrediction = 0.34335938; learningRatePerSample  = 0.003125000047; EpochTime=40.972593
+ Epoch[ 2 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.20858374; err = 0.36269531; TotalTime = 2.60521s; TotalTimePerSample = 0.50883ms; SamplesPerSecond = 1965
+ Epoch[ 2 of 4]-Minibatch[  11-  20 of 160]: * 5120; ce =  1.15259104; err = 0.34609375; TotalTime = 2.83770s; TotalTimePerSample = 0.55424ms; SamplesPerSecond = 1804
+ Epoch[ 2 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.11579819; err = 0.34238281; TotalTime = 2.41000s; TotalTimePerSample = 0.47070ms; SamplesPerSecond = 2124
+ Epoch[ 2 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  1.11495934; err = 0.34570313; TotalTime = 2.45457s; TotalTimePerSample = 0.47941ms; SamplesPerSecond = 2085
+ Epoch[ 2 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  1.19023743; err = 0.36894531; TotalTime = 2.40870s; TotalTimePerSample = 0.47045ms; SamplesPerSecond = 2125
+ Epoch[ 2 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  1.18024330; err = 0.36210938; TotalTime = 2.75762s; TotalTimePerSample = 0.53860ms; SamplesPerSecond = 1856
+ Epoch[ 2 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  1.16740799; err = 0.35175781; TotalTime = 2.46142s; TotalTimePerSample = 0.48075ms; SamplesPerSecond = 2080
+ Epoch[ 2 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  1.09000397; err = 0.34121094; TotalTime = 2.43527s; TotalTimePerSample = 0.47564ms; SamplesPerSecond = 2102
+ Epoch[ 2 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  1.09481125; err = 0.33378906; TotalTime = 2.46109s; TotalTimePerSample = 0.48068ms; SamplesPerSecond = 2080
+ Epoch[ 2 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  1.06353989; err = 0.32480469; TotalTime = 2.90383s; TotalTimePerSample = 0.56715ms; SamplesPerSecond = 1763
+ Epoch[ 2 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  1.11828995; err = 0.34199219; TotalTime = 2.37363s; TotalTimePerSample = 0.46360ms; SamplesPerSecond = 2157
+ Epoch[ 2 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  1.15714111; err = 0.35742188; TotalTime = 2.41495s; TotalTimePerSample = 0.47167ms; SamplesPerSecond = 2120
+ Epoch[ 2 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  1.06128235; err = 0.32871094; TotalTime = 2.42177s; TotalTimePerSample = 0.47300ms; SamplesPerSecond = 2114
+ Epoch[ 2 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  1.03393250; err = 0.32167969; TotalTime = 2.86138s; TotalTimePerSample = 0.55886ms; SamplesPerSecond = 1789
+ Epoch[ 2 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  1.09766846; err = 0.33828125; TotalTime = 2.49456s; TotalTimePerSample = 0.48722ms; SamplesPerSecond = 2052
+ Epoch[ 2 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  1.07232513; err = 0.32617188; TotalTime = 2.62645s; TotalTimePerSample = 0.51298ms; SamplesPerSecond = 1949
+Finished Epoch[ 2 of 4]: [Training] ce = 1.119926; err = 0.34335938; learningRatePerSample = 0.003125000047; EpochTime=40.972593
 Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 2: frames [163840..245760] (first utterance at frame 163840), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 3 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.12369976; EvalErrorPrediction = 0.34121094; TotalTime = 2.71170s; TotalTimePerSample = 0.52963ms; SamplesPerSecond = 1888
- Epoch[ 3 of 4]-Minibatch[  11-  20 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08172264; EvalErrorPrediction = 0.33535156; TotalTime = 3.00751s; TotalTimePerSample = 0.58741ms; SamplesPerSecond = 1702
- Epoch[ 3 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06359730; EvalErrorPrediction = 0.32871094; TotalTime = 2.41305s; TotalTimePerSample = 0.47130ms; SamplesPerSecond = 2121
- Epoch[ 3 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09275322; EvalErrorPrediction = 0.34023437; TotalTime = 2.38985s; TotalTimePerSample = 0.46677ms; SamplesPerSecond = 2142
- Epoch[ 3 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06271973; EvalErrorPrediction = 0.32773438; TotalTime = 2.45865s; TotalTimePerSample = 0.48021ms; SamplesPerSecond = 2082
- Epoch[ 3 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05469666; EvalErrorPrediction = 0.33300781; TotalTime = 2.79982s; TotalTimePerSample = 0.54684ms; SamplesPerSecond = 1828
- Epoch[ 3 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09982071; EvalErrorPrediction = 0.33613281; TotalTime = 2.28760s; TotalTimePerSample = 0.44680ms; SamplesPerSecond = 2238
- Epoch[ 3 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  1.08198242; EvalErrorPrediction = 0.32695313; TotalTime = 2.27461s; TotalTimePerSample = 0.44426ms; SamplesPerSecond = 2250
- Epoch[ 3 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  1.04190979; EvalErrorPrediction = 0.32617188; TotalTime = 2.41349s; TotalTimePerSample = 0.47138ms; SamplesPerSecond = 2121
- Epoch[ 3 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05153503; EvalErrorPrediction = 0.32187500; TotalTime = 2.94621s; TotalTimePerSample = 0.57543ms; SamplesPerSecond = 1737
- Epoch[ 3 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  1.04438324; EvalErrorPrediction = 0.32597656; TotalTime = 2.49370s; TotalTimePerSample = 0.48705ms; SamplesPerSecond = 2053
- Epoch[ 3 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  1.09297791; EvalErrorPrediction = 0.33476563; TotalTime = 2.44252s; TotalTimePerSample = 0.47705ms; SamplesPerSecond = 2096
- Epoch[ 3 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  1.11578674; EvalErrorPrediction = 0.33554688; TotalTime = 2.41458s; TotalTimePerSample = 0.47160ms; SamplesPerSecond = 2120
- Epoch[ 3 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06495667; EvalErrorPrediction = 0.32539062; TotalTime = 2.80408s; TotalTimePerSample = 0.54767ms; SamplesPerSecond = 1825
- Epoch[ 3 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  1.06517181; EvalErrorPrediction = 0.33945313; TotalTime = 2.51803s; TotalTimePerSample = 0.49180ms; SamplesPerSecond = 2033
- Epoch[ 3 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  1.05488586; EvalErrorPrediction = 0.32402344; TotalTime = 2.71141s; TotalTimePerSample = 0.52957ms; SamplesPerSecond = 1888
-Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.0745375; EvalErrorPrediction = 0.33140871; learningRatePerSample  = 0.003125000047; EpochTime=41.133271
+ Epoch[ 3 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.12369976; err = 0.34121094; TotalTime = 2.71170s; TotalTimePerSample = 0.52963ms; SamplesPerSecond = 1888
+ Epoch[ 3 of 4]-Minibatch[  11-  20 of 160]: * 5120; ce =  1.08172264; err = 0.33535156; TotalTime = 3.00751s; TotalTimePerSample = 0.58741ms; SamplesPerSecond = 1702
+ Epoch[ 3 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.06359730; err = 0.32871094; TotalTime = 2.41305s; TotalTimePerSample = 0.47130ms; SamplesPerSecond = 2121
+ Epoch[ 3 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  1.09275322; err = 0.34023437; TotalTime = 2.38985s; TotalTimePerSample = 0.46677ms; SamplesPerSecond = 2142
+ Epoch[ 3 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  1.06271973; err = 0.32773438; TotalTime = 2.45865s; TotalTimePerSample = 0.48021ms; SamplesPerSecond = 2082
+ Epoch[ 3 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  1.05469666; err = 0.33300781; TotalTime = 2.79982s; TotalTimePerSample = 0.54684ms; SamplesPerSecond = 1828
+ Epoch[ 3 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  1.09982071; err = 0.33613281; TotalTime = 2.28760s; TotalTimePerSample = 0.44680ms; SamplesPerSecond = 2238
+ Epoch[ 3 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  1.08198242; err = 0.32695313; TotalTime = 2.27461s; TotalTimePerSample = 0.44426ms; SamplesPerSecond = 2250
+ Epoch[ 3 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  1.04190979; err = 0.32617188; TotalTime = 2.41349s; TotalTimePerSample = 0.47138ms; SamplesPerSecond = 2121
+ Epoch[ 3 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  1.05153503; err = 0.32187500; TotalTime = 2.94621s; TotalTimePerSample = 0.57543ms; SamplesPerSecond = 1737
+ Epoch[ 3 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  1.04438324; err = 0.32597656; TotalTime = 2.49370s; TotalTimePerSample = 0.48705ms; SamplesPerSecond = 2053
+ Epoch[ 3 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  1.09297791; err = 0.33476563; TotalTime = 2.44252s; TotalTimePerSample = 0.47705ms; SamplesPerSecond = 2096
+ Epoch[ 3 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  1.11578674; err = 0.33554688; TotalTime = 2.41458s; TotalTimePerSample = 0.47160ms; SamplesPerSecond = 2120
+ Epoch[ 3 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  1.06495667; err = 0.32539062; TotalTime = 2.80408s; TotalTimePerSample = 0.54767ms; SamplesPerSecond = 1825
+ Epoch[ 3 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  1.06517181; err = 0.33945313; TotalTime = 2.51803s; TotalTimePerSample = 0.49180ms; SamplesPerSecond = 2033
+ Epoch[ 3 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  1.05488586; err = 0.32402344; TotalTime = 2.71141s; TotalTimePerSample = 0.52957ms; SamplesPerSecond = 1888
+Finished Epoch[ 3 of 4]: [Training] ce = 1.0745375; err = 0.33140871; learningRatePerSample = 0.003125000047; EpochTime=41.133271
 Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210 
 minibatchiterator: epoch 3: frames [245760..327680] (first utterance at frame 245760), data subset 0 of 1, with 1 datapasses
 
 Starting minibatch loop.
- Epoch[ 4 of 4]-Minibatch[   1-  10 of 160]: * 5120; CrossEntropyWithSoftmax =  1.03678217; EvalErrorPrediction = 0.32226563; TotalTime = 2.66643s; TotalTimePerSample = 0.52079ms; SamplesPerSecond = 1920
- Epoch[ 4 of 4]-Minibatch[  11-  20 of 160]: * 4926; CrossEntropyWithSoftmax =  1.04455462; EvalErrorPrediction = 0.32115307; TotalTime = 3.59836s; TotalTimePerSample = 0.73048ms; SamplesPerSecond = 1368
- Epoch[ 4 of 4]-Minibatch[  21-  30 of 160]: * 5120; CrossEntropyWithSoftmax =  1.01124554; EvalErrorPrediction = 0.31757812; TotalTime = 2.59429s; TotalTimePerSample = 0.50670ms; SamplesPerSecond = 1973
- Epoch[ 4 of 4]-Minibatch[  31-  40 of 160]: * 5120; CrossEntropyWithSoftmax =  1.00737896; EvalErrorPrediction = 0.31542969; TotalTime = 2.41720s; TotalTimePerSample = 0.47211ms; SamplesPerSecond = 2118
- Epoch[ 4 of 4]-Minibatch[  41-  50 of 160]: * 5120; CrossEntropyWithSoftmax =  1.00528069; EvalErrorPrediction = 0.31347656; TotalTime = 2.48426s; TotalTimePerSample = 0.48521ms; SamplesPerSecond = 2060
- Epoch[ 4 of 4]-Minibatch[  51-  60 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99712868; EvalErrorPrediction = 0.30898437; TotalTime = 3.63720s; TotalTimePerSample = 0.71039ms; SamplesPerSecond = 1407
- Epoch[ 4 of 4]-Minibatch[  61-  70 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99358406; EvalErrorPrediction = 0.30605469; TotalTime = 4.19525s; TotalTimePerSample = 0.81938ms; SamplesPerSecond = 1220
- Epoch[ 4 of 4]-Minibatch[  71-  80 of 160]: * 5120; CrossEntropyWithSoftmax =  1.02139435; EvalErrorPrediction = 0.31445313; TotalTime = 2.82677s; TotalTimePerSample = 0.55210ms; SamplesPerSecond = 1811
- Epoch[ 4 of 4]-Minibatch[  81-  90 of 160]: * 5120; CrossEntropyWithSoftmax =  1.00482025; EvalErrorPrediction = 0.31542969; TotalTime = 2.87430s; TotalTimePerSample = 0.56139ms; SamplesPerSecond = 1781
- Epoch[ 4 of 4]-Minibatch[  91- 100 of 160]: * 5120; CrossEntropyWithSoftmax =  0.97015762; EvalErrorPrediction = 0.31035156; TotalTime = 4.26995s; TotalTimePerSample = 0.83397ms; SamplesPerSecond = 1199
- Epoch[ 4 of 4]-Minibatch[ 101- 110 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99198990; EvalErrorPrediction = 0.30410156; TotalTime = 2.86275s; TotalTimePerSample = 0.55913ms; SamplesPerSecond = 1788
- Epoch[ 4 of 4]-Minibatch[ 111- 120 of 160]: * 5120; CrossEntropyWithSoftmax =  0.99568176; EvalErrorPrediction = 0.30703125; TotalTime = 2.88755s; TotalTimePerSample = 0.56397ms; SamplesPerSecond = 1773
- Epoch[ 4 of 4]-Minibatch[ 121- 130 of 160]: * 5120; CrossEntropyWithSoftmax =  1.01617508; EvalErrorPrediction = 0.31660156; TotalTime = 2.64123s; TotalTimePerSample = 0.51586ms; SamplesPerSecond = 1938
- Epoch[ 4 of 4]-Minibatch[ 131- 140 of 160]: * 5120; CrossEntropyWithSoftmax =  0.98181610; EvalErrorPrediction = 0.30996094; TotalTime = 2.65385s; TotalTimePerSample = 0.51833ms; SamplesPerSecond = 1929
- Epoch[ 4 of 4]-Minibatch[ 141- 150 of 160]: * 5120; CrossEntropyWithSoftmax =  0.94842072; EvalErrorPrediction = 0.30253906; TotalTime = 2.43389s; TotalTimePerSample = 0.47537ms; SamplesPerSecond = 2103
- Epoch[ 4 of 4]-Minibatch[ 151- 160 of 160]: * 5120; CrossEntropyWithSoftmax =  0.96573181; EvalErrorPrediction = 0.30156250; TotalTime = 2.77351s; TotalTimePerSample = 0.54170ms; SamplesPerSecond = 1846
-Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 0.9992947; EvalErrorPrediction = 0.31165773; learningRatePerSample  = 0.003125000047; EpochTime=48.032169
+ Epoch[ 4 of 4]-Minibatch[   1-  10 of 160]: * 5120; ce =  1.03678217; err = 0.32226563; TotalTime = 2.66643s; TotalTimePerSample = 0.52079ms; SamplesPerSecond = 1920
+ Epoch[ 4 of 4]-Minibatch[  11-  20 of 160]: * 4926; ce =  1.04455462; err = 0.32115307; TotalTime = 3.59836s; TotalTimePerSample = 0.73048ms; SamplesPerSecond = 1368
+ Epoch[ 4 of 4]-Minibatch[  21-  30 of 160]: * 5120; ce =  1.01124554; err = 0.31757812; TotalTime = 2.59429s; TotalTimePerSample = 0.50670ms; SamplesPerSecond = 1973
+ Epoch[ 4 of 4]-Minibatch[  31-  40 of 160]: * 5120; ce =  1.00737896; err = 0.31542969; TotalTime = 2.41720s; TotalTimePerSample = 0.47211ms; SamplesPerSecond = 2118
+ Epoch[ 4 of 4]-Minibatch[  41-  50 of 160]: * 5120; ce =  1.00528069; err = 0.31347656; TotalTime = 2.48426s; TotalTimePerSample = 0.48521ms; SamplesPerSecond = 2060
+ Epoch[ 4 of 4]-Minibatch[  51-  60 of 160]: * 5120; ce =  0.99712868; err = 0.30898437; TotalTime = 3.63720s; TotalTimePerSample = 0.71039ms; SamplesPerSecond = 1407
+ Epoch[ 4 of 4]-Minibatch[  61-  70 of 160]: * 5120; ce =  0.99358406; err = 0.30605469; TotalTime = 4.19525s; TotalTimePerSample = 0.81938ms; SamplesPerSecond = 1220
+ Epoch[ 4 of 4]-Minibatch[  71-  80 of 160]: * 5120; ce =  1.02139435; err = 0.31445313; TotalTime = 2.82677s; TotalTimePerSample = 0.55210ms; SamplesPerSecond = 1811
+ Epoch[ 4 of 4]-Minibatch[  81-  90 of 160]: * 5120; ce =  1.00482025; err = 0.31542969; TotalTime = 2.87430s; TotalTimePerSample = 0.56139ms; SamplesPerSecond = 1781
+ Epoch[ 4 of 4]-Minibatch[  91- 100 of 160]: * 5120; ce =  0.97015762; err = 0.31035156; TotalTime = 4.26995s; TotalTimePerSample = 0.83397ms; SamplesPerSecond = 1199
+ Epoch[ 4 of 4]-Minibatch[ 101- 110 of 160]: * 5120; ce =  0.99198990; err = 0.30410156; TotalTime = 2.86275s; TotalTimePerSample = 0.55913ms; SamplesPerSecond = 1788
+ Epoch[ 4 of 4]-Minibatch[ 111- 120 of 160]: * 5120; ce =  0.99568176; err = 0.30703125; TotalTime = 2.88755s; TotalTimePerSample = 0.56397ms; SamplesPerSecond = 1773
+ Epoch[ 4 of 4]-Minibatch[ 121- 130 of 160]: * 5120; ce =  1.01617508; err = 0.31660156; TotalTime = 2.64123s; TotalTimePerSample = 0.51586ms; SamplesPerSecond = 1938
+ Epoch[ 4 of 4]-Minibatch[ 131- 140 of 160]: * 5120; ce =  0.98181610; err = 0.30996094; TotalTime = 2.65385s; TotalTimePerSample = 0.51833ms; SamplesPerSecond = 1929
+ Epoch[ 4 of 4]-Minibatch[ 141- 150 of 160]: * 5120; ce =  0.94842072; err = 0.30253906; TotalTime = 2.43389s; TotalTimePerSample = 0.47537ms; SamplesPerSecond = 2103
+ Epoch[ 4 of 4]-Minibatch[ 151- 160 of 160]: * 5120; ce =  0.96573181; err = 0.30156250; TotalTime = 2.77351s; TotalTimePerSample = 0.54170ms; SamplesPerSecond = 1846
+Finished Epoch[ 4 of 4]: [Training] ce = 0.9992947; err = 0.31165773; learningRatePerSample = 0.003125000047; EpochTime=48.032169
 CNTKCommandTrainEnd: speechTrain
 __COMPLETED__
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.gpu.txt
index d38b46c9c..90ba7d79d 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/cntk_dpt.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu DeviceId=0 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/cntk_dpt.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -16,31 +16,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:25:58: -------------------------------------------------------------------
-05/03/2016 14:25:58: Build info: 
+05/03/2016 14:22:23: -------------------------------------------------------------------
+05/03/2016 14:22:23: Build info: 
 
-05/03/2016 14:25:58: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:25:58: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:25:58: 		Build type: Release
-05/03/2016 14:25:58: 		Build target: GPU
-05/03/2016 14:25:58: 		With 1bit-SGD: no
-05/03/2016 14:25:58: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:25:58: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:25:58: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:25:58: 		Build Branch: HEAD
-05/03/2016 14:25:58: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:25:58: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:25:58: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:25:58: -------------------------------------------------------------------
+05/03/2016 14:22:23: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:22:23: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:22:23: 		Build type: Release
+05/03/2016 14:22:23: 		Build target: GPU
+05/03/2016 14:22:23: 		With 1bit-SGD: no
+05/03/2016 14:22:23: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:22:23: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:22:23: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:22:23: 		Build Branch: HEAD
+05/03/2016 14:22:23: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:22:23: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:22:23: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:22:23: -------------------------------------------------------------------
 
-05/03/2016 14:25:58: Running on cntk-muc02 at 2016/05/03 14:25:58
-05/03/2016 14:25:58: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/cntk_dpt.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu  DeviceId=0  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 14:22:23: Running on cntk-muc02 at 2016/05/03 14:22:23
+05/03/2016 14:22:23: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/cntk_dpt.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 14:25:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:25:58: precision = "float"
+05/03/2016 14:22:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:22:23: precision = "float"
 deviceId = $DeviceId$
 command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
 ndlMacros = "$ConfigDir$/macros.txt"
@@ -127,19 +127,17 @@ reader = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:25:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:22:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:25:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:25:58: precision = "float"
+05/03/2016 14:22:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:22:23: precision = "float"
 deviceId = 0
 command = dptPre1:addLayer2:dptPre2:addLayer3:speechTrain
 ndlMacros = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/macros.txt"
@@ -158,7 +156,7 @@ SGD = [
 ]
 dptPre1 = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
     NDLNetworkBuilder = [
         networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/dnn_1layer.txt"
     ]
@@ -167,13 +165,13 @@ addLayer2 = [
     action = "edit"
     currLayer = 1
     newLayer = 2
-    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
-    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
+    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
     editPath  = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/add_layer.mel"
 ]
 dptPre2 = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
     NDLNetworkBuilder = [
         networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/dnn_1layer.txt"
     ]
@@ -182,13 +180,13 @@ addLayer3 = [
     action = "edit"
     currLayer = 2
     newLayer = 3
-    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
-    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
+    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
     editPath  = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/add_layer.mel"
 ]
 speechTrain = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
     deviceId = 0
     traceLevel = 1
     NDLNetworkBuilder = [
@@ -226,24 +224,22 @@ reader = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:25:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:22:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:25:58: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:22:23: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk_dpt.cntk:addLayer2=[    
     action = "edit"
     currLayer = 1
     newLayer = 2
-    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
-    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
+    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0"
     editPath  = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/add_layer.mel"
 ]
 
@@ -251,8 +247,8 @@ configparameters: cntk_dpt.cntk:addLayer3=[
     action = "edit"
     currLayer = 2
     newLayer = 3
-    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
-    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
+    currModel = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    newModel  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0"
     editPath  = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/add_layer.mel"
 ]
 
@@ -263,7 +259,7 @@ configparameters: cntk_dpt.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W
 configparameters: cntk_dpt.cntk:deviceId=0
 configparameters: cntk_dpt.cntk:dptPre1=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech"
     NDLNetworkBuilder = [
         networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/dnn_1layer.txt"
     ]
@@ -271,7 +267,7 @@ configparameters: cntk_dpt.cntk:dptPre1=[
 
 configparameters: cntk_dpt.cntk:dptPre2=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech"
     NDLNetworkBuilder = [
         networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/dnn_1layer.txt"
     ]
@@ -281,7 +277,7 @@ configparameters: cntk_dpt.cntk:globalInvStdPath=GlobalStats/var.363
 configparameters: cntk_dpt.cntk:globalMeanPath=GlobalStats/mean.363
 configparameters: cntk_dpt.cntk:globalPriorPath=GlobalStats/prior.132
 configparameters: cntk_dpt.cntk:ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN\DiscriminativePreTraining/macros.txt
-configparameters: cntk_dpt.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+configparameters: cntk_dpt.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 configparameters: cntk_dpt.cntk:precision=float
 configparameters: cntk_dpt.cntk:reader=[
     readerType = "HTKMLFReader"
@@ -300,9 +296,9 @@ configparameters: cntk_dpt.cntk:reader=[
         labelDim = 132
         labelType = "category"
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk_dpt.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu
+configparameters: cntk_dpt.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu
 configparameters: cntk_dpt.cntk:SGD=[
     epochSize = 81920
     minibatchSize = 256
@@ -315,7 +311,7 @@ configparameters: cntk_dpt.cntk:SGD=[
 
 configparameters: cntk_dpt.cntk:speechTrain=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech"
     deviceId = 0
     traceLevel = 1
     NDLNetworkBuilder = [
@@ -337,34 +333,33 @@ configparameters: cntk_dpt.cntk:speechTrain=[
 
 configparameters: cntk_dpt.cntk:timestamping=true
 configparameters: cntk_dpt.cntk:traceLevel=1
-05/03/2016 14:25:58: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:25:58: Commands: dptPre1 addLayer2 dptPre2 addLayer3 speechTrain
-05/03/2016 14:25:58: Precision = "float"
-05/03/2016 14:25:58: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech
-05/03/2016 14:25:58: CNTKCommandTrainInfo: dptPre1 : 2
-05/03/2016 14:25:58: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech
-05/03/2016 14:25:58: CNTKCommandTrainInfo: dptPre2 : 2
-05/03/2016 14:25:58: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech
-05/03/2016 14:25:58: CNTKCommandTrainInfo: speechTrain : 4
-05/03/2016 14:25:58: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 8
+05/03/2016 14:22:23: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:22:23: Commands: dptPre1 addLayer2 dptPre2 addLayer3 speechTrain
+05/03/2016 14:22:23: Precision = "float"
+05/03/2016 14:22:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech
+05/03/2016 14:22:23: CNTKCommandTrainInfo: dptPre1 : 2
+05/03/2016 14:22:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech
+05/03/2016 14:22:23: CNTKCommandTrainInfo: dptPre2 : 2
+05/03/2016 14:22:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech
+05/03/2016 14:22:23: CNTKCommandTrainInfo: speechTrain : 4
+05/03/2016 14:22:23: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 8
 
-05/03/2016 14:25:58: ##############################################################################
-05/03/2016 14:25:58: #                                                                            #
-05/03/2016 14:25:58: # Action "train"                                                             #
-05/03/2016 14:25:58: #                                                                            #
-05/03/2016 14:25:58: ##############################################################################
+05/03/2016 14:22:23: ##############################################################################
+05/03/2016 14:22:23: #                                                                            #
+05/03/2016 14:22:23: # Action "train"                                                             #
+05/03/2016 14:22:23: #                                                                            #
+05/03/2016 14:22:23: ##############################################################################
 
-05/03/2016 14:25:58: CNTKCommandTrainBegin: dptPre1
+05/03/2016 14:22:23: CNTKCommandTrainBegin: dptPre1
 NDLBuilder Using GPU 0
-Reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:25:58: Creating virgin network.
+05/03/2016 14:22:24: Creating virgin network.
 Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -407,14 +402,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:25:59: Created model with 19 nodes on GPU 0.
+05/03/2016 14:22:24: Created model with 19 nodes on GPU 0.
 
-05/03/2016 14:25:59: Training criterion node(s):
-05/03/2016 14:25:59: 	ce = CrossEntropyWithSoftmax
+05/03/2016 14:22:24: Training criterion node(s):
+05/03/2016 14:22:24: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 14:25:59: Evaluation criterion node(s):
+05/03/2016 14:22:24: Evaluation criterion node(s):
 
-05/03/2016 14:25:59: 	err = ErrorPrediction
+05/03/2016 14:22:24: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -422,117 +417,120 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[err Gradient[1]] [featNorm Gradient[363 x *]] [features Gradient[363 x *]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *]] }
-000000220FCC7720: {[features Value[363 x *]] }
-000000220FCC8080: {[labels Value[132 x *]] }
-000000220FCC8440: {[globalMean Value[363 x 1]] }
-000000220FCC86C0: {[globalInvStd Value[363 x 1]] }
-000000220FCC8760: {[globalPrior Value[132 x 1]] }
-00000022282B7530: {[OL.b Value[132 x 1]] }
-00000022282B77B0: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *]] }
-00000022282B7AD0: {[scaledLogLikelihood Value[132 x 1 x *]] }
-00000022282B7B70: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *]] }
-00000022282B7F30: {[ce Gradient[1]] }
-00000022282B7FD0: {[HL1.t Gradient[512 x *]] [HL1.y Value[512 x 1 x *]] }
-00000022282B8110: {[OL.W Value[132 x 512]] }
-00000022282B82F0: {[HL1.t Value[512 x *]] }
-00000022282B8390: {[HL1.z Gradient[512 x 1 x *]] [OL.t Value[132 x 1 x *]] }
-00000022282B8890: {[err Value[1]] }
-00000022282B8930: {[logPrior Value[132 x 1]] }
-00000022282B89D0: {[featNorm Value[363 x *]] }
-00000022282B8BB0: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *]] [OL.z Gradient[132 x 1 x *]] }
-00000022282B8C50: {[OL.t Gradient[132 x 1 x *]] }
-00000022282B8CF0: {[OL.b Gradient[132 x 1]] }
-00000022282B8E30: {[HL1.W Value[512 x 363]] }
-00000022282B90B0: {[HL1.b Value[512 x 1]] }
-00000022282B9290: {[ce Value[1]] }
+000000EB9F9B00F0: {[labels Value[132 x *]] }
+000000EB9F9B0870: {[globalMean Value[363 x 1]] }
+000000EB9F9B1310: {[globalInvStd Value[363 x 1]] }
+000000EB9F9B1810: {[features Value[363 x *]] }
+000000EBB77CD290: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *]] }
+000000EBB77CD470: {[err Value[1]] }
+000000EBB77CD510: {[HL1.t Value[512 x *]] }
+000000EBB77CD5B0: {[ce Value[1]] }
+000000EBB77CD650: {[HL1.t Gradient[512 x *]] [HL1.y Value[512 x 1 x *]] }
+000000EBB77CD6F0: {[HL1.z Gradient[512 x 1 x *]] [OL.t Value[132 x 1 x *]] }
+000000EBB77CDAB0: {[HL1.b Value[512 x 1]] }
+000000EBB77CE0F0: {[logPrior Value[132 x 1]] }
+000000EBB77CE190: {[featNorm Value[363 x *]] }
+000000EBB77CE230: {[OL.b Value[132 x 1]] }
+000000EBB77CE2D0: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *]] }
+000000EBB77CE690: {[globalPrior Value[132 x 1]] }
+000000EBB77CEEB0: {[scaledLogLikelihood Value[132 x 1 x *]] }
+000000EBB77CF090: {[HL1.W Value[512 x 363]] }
+000000EBB77CF130: {[OL.W Value[132 x 512]] }
+000000EBB9FFCF00: {[OL.b Gradient[132 x 1]] }
+000000EBB9FFD680: {[ce Gradient[1]] }
+000000EBB9FFD9A0: {[OL.t Gradient[132 x 1 x *]] }
+000000EBB9FFE260: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *]] [OL.z Gradient[132 x 1 x *]] }
 
-05/03/2016 14:25:59: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:22:24: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:25:59: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 14:22:24: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:25:59: Starting minibatch loop.
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.13%]: ce = 3.89978256 * 2560; err = 0.84375000 * 2560; time = 0.3253s; samplesPerSecond = 7870.4
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.96755676 * 2560; err = 0.72031250 * 2560; time = 0.0279s; samplesPerSecond = 91628.2
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.55723495 * 2560; err = 0.65859375 * 2560; time = 0.0281s; samplesPerSecond = 91100.0
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 2.29642715 * 2560; err = 0.61992187 * 2560; time = 0.0281s; samplesPerSecond = 90976.9
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.63%]: ce = 2.02396469 * 2560; err = 0.55117187 * 2560; time = 0.0283s; samplesPerSecond = 90545.8
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.87309265 * 2560; err = 0.51484375 * 2560; time = 0.0283s; samplesPerSecond = 90510.5
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.78157196 * 2560; err = 0.50507813 * 2560; time = 0.0281s; samplesPerSecond = 91135.6
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.75391235 * 2560; err = 0.50781250 * 2560; time = 0.0281s; samplesPerSecond = 91015.7
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.66460266 * 2560; err = 0.45742187 * 2560; time = 0.0281s; samplesPerSecond = 91116.2
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.62184296 * 2560; err = 0.47968750 * 2560; time = 0.0282s; samplesPerSecond = 90754.4
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.65328217 * 2560; err = 0.47265625 * 2560; time = 0.0282s; samplesPerSecond = 90828.5
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.50686951 * 2560; err = 0.44921875 * 2560; time = 0.0282s; samplesPerSecond = 90854.2
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.46723938 * 2560; err = 0.42304687 * 2560; time = 0.0281s; samplesPerSecond = 91048.1
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.49163513 * 2560; err = 0.44140625 * 2560; time = 0.0281s; samplesPerSecond = 91028.7
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.46437683 * 2560; err = 0.43398437 * 2560; time = 0.0282s; samplesPerSecond = 90938.2
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.43047485 * 2560; err = 0.43867187 * 2560; time = 0.0283s; samplesPerSecond = 90587.4
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.42106018 * 2560; err = 0.41992188 * 2560; time = 0.0281s; samplesPerSecond = 91093.5
-05/03/2016 14:25:59:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.46538086 * 2560; err = 0.42421875 * 2560; time = 0.0282s; samplesPerSecond = 90912.3
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.47427673 * 2560; err = 0.44062500 * 2560; time = 0.0282s; samplesPerSecond = 90796.2
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.42847290 * 2560; err = 0.44023438 * 2560; time = 0.0281s; samplesPerSecond = 91207.1
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.34078369 * 2560; err = 0.41171875 * 2560; time = 0.0281s; samplesPerSecond = 91090.2
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.39474487 * 2560; err = 0.42734375 * 2560; time = 0.0285s; samplesPerSecond = 89843.5
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.40151062 * 2560; err = 0.41250000 * 2560; time = 0.0283s; samplesPerSecond = 90561.8
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.39345703 * 2560; err = 0.42734375 * 2560; time = 0.0281s; samplesPerSecond = 91129.1
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.32485046 * 2560; err = 0.40156250 * 2560; time = 0.0284s; samplesPerSecond = 90071.1
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.27032471 * 2560; err = 0.39765625 * 2560; time = 0.0281s; samplesPerSecond = 91044.9
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.32375488 * 2560; err = 0.39257813 * 2560; time = 0.0281s; samplesPerSecond = 90970.5
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.25393982 * 2560; err = 0.38320312 * 2560; time = 0.0282s; samplesPerSecond = 90876.8
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.23377075 * 2560; err = 0.36953125 * 2560; time = 0.0282s; samplesPerSecond = 90825.2
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.20861511 * 2560; err = 0.35976562 * 2560; time = 0.0282s; samplesPerSecond = 90902.6
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.23675232 * 2560; err = 0.36757812 * 2560; time = 0.0281s; samplesPerSecond = 91070.8
-05/03/2016 14:26:00:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.22960205 * 2560; err = 0.37460938 * 2560; time = 0.0282s; samplesPerSecond = 90728.7
-05/03/2016 14:26:00: Finished Epoch[ 1 of 2]: [Training] ce = 1.65172386 * 81920; err = 0.46774902 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.26071s
-05/03/2016 14:26:00: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech.1'
+05/03/2016 14:22:25: Starting minibatch loop.
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.13%]: ce = 3.89978256 * 2560; err = 0.84375000 * 2560; time = 0.2017s; samplesPerSecond = 12691.6
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.96755676 * 2560; err = 0.72031250 * 2560; time = 0.0285s; samplesPerSecond = 89941.3
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.55723495 * 2560; err = 0.65859375 * 2560; time = 0.0288s; samplesPerSecond = 88885.8
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 2.29642715 * 2560; err = 0.61992187 * 2560; time = 0.0286s; samplesPerSecond = 89588.8
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.63%]: ce = 2.02396469 * 2560; err = 0.55117187 * 2560; time = 0.0286s; samplesPerSecond = 89422.9
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.87309265 * 2560; err = 0.51484375 * 2560; time = 0.0287s; samplesPerSecond = 89338.7
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.78157196 * 2560; err = 0.50507813 * 2560; time = 0.0284s; samplesPerSecond = 90150.4
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.75391235 * 2560; err = 0.50781250 * 2560; time = 0.0277s; samplesPerSecond = 92335.4
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.66460266 * 2560; err = 0.45742187 * 2560; time = 0.0280s; samplesPerSecond = 91507.0
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.62184296 * 2560; err = 0.47968750 * 2560; time = 0.0286s; samplesPerSecond = 89554.3
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.65328217 * 2560; err = 0.47265625 * 2560; time = 0.0287s; samplesPerSecond = 89161.3
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.50686951 * 2560; err = 0.44921875 * 2560; time = 0.0278s; samplesPerSecond = 92215.7
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.46723938 * 2560; err = 0.42304687 * 2560; time = 0.0276s; samplesPerSecond = 92649.6
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.49163513 * 2560; err = 0.44140625 * 2560; time = 0.0273s; samplesPerSecond = 93896.7
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.46437683 * 2560; err = 0.43398437 * 2560; time = 0.0270s; samplesPerSecond = 94888.6
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.43047485 * 2560; err = 0.43867187 * 2560; time = 0.0272s; samplesPerSecond = 93993.2
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.42106018 * 2560; err = 0.41992188 * 2560; time = 0.0277s; samplesPerSecond = 92565.8
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.46538086 * 2560; err = 0.42421875 * 2560; time = 0.0276s; samplesPerSecond = 92673.0
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.47427673 * 2560; err = 0.44062500 * 2560; time = 0.0276s; samplesPerSecond = 92868.0
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.42847290 * 2560; err = 0.44023438 * 2560; time = 0.0276s; samplesPerSecond = 92619.4
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.34078369 * 2560; err = 0.41171875 * 2560; time = 0.0276s; samplesPerSecond = 92636.1
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.39474487 * 2560; err = 0.42734375 * 2560; time = 0.0276s; samplesPerSecond = 92720.0
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.40151062 * 2560; err = 0.41250000 * 2560; time = 0.0276s; samplesPerSecond = 92733.5
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.39345703 * 2560; err = 0.42734375 * 2560; time = 0.0281s; samplesPerSecond = 90999.6
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.32485046 * 2560; err = 0.40156250 * 2560; time = 0.0275s; samplesPerSecond = 93182.4
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.27032471 * 2560; err = 0.39765625 * 2560; time = 0.0275s; samplesPerSecond = 92955.7
+05/03/2016 14:22:25:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.32375488 * 2560; err = 0.39257813 * 2560; time = 0.0276s; samplesPerSecond = 92616.0
+05/03/2016 14:22:26:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.25393982 * 2560; err = 0.38320312 * 2560; time = 0.0276s; samplesPerSecond = 92659.6
+05/03/2016 14:22:26:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.23377075 * 2560; err = 0.36953125 * 2560; time = 0.0276s; samplesPerSecond = 92663.0
+05/03/2016 14:22:26:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.20861511 * 2560; err = 0.35976562 * 2560; time = 0.0276s; samplesPerSecond = 92713.3
+05/03/2016 14:22:26:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.23675232 * 2560; err = 0.36757812 * 2560; time = 0.0276s; samplesPerSecond = 92632.8
+05/03/2016 14:22:26:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.22960205 * 2560; err = 0.37460938 * 2560; time = 0.0276s; samplesPerSecond = 92800.7
+05/03/2016 14:22:26: Finished Epoch[ 1 of 2]: [Training] ce = 1.65172386 * 81920; err = 0.46774902 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.21837s
+05/03/2016 14:22:26: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech.1'
 
-05/03/2016 14:26:00: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 14:22:26: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:26:00: Starting minibatch loop.
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.13%]: ce = 1.21869726 * 2560; err = 0.36992188 * 2560; time = 0.0280s; samplesPerSecond = 91422.0
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.18345690 * 2560; err = 0.36679688 * 2560; time = 0.0268s; samplesPerSecond = 95372.9
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.17220421 * 2560; err = 0.35898438 * 2560; time = 0.0273s; samplesPerSecond = 93848.5
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.20035286 * 2560; err = 0.35781250 * 2560; time = 0.0272s; samplesPerSecond = 94169.6
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.19499741 * 2560; err = 0.37460938 * 2560; time = 0.0274s; samplesPerSecond = 93587.8
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.16373482 * 2560; err = 0.34687500 * 2560; time = 0.0274s; samplesPerSecond = 93338.7
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.13869247 * 2560; err = 0.34804687 * 2560; time = 0.0273s; samplesPerSecond = 93766.0
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.19293823 * 2560; err = 0.36992188 * 2560; time = 0.0272s; samplesPerSecond = 93982.9
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.23978348 * 2560; err = 0.37539062 * 2560; time = 0.0272s; samplesPerSecond = 93993.2
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.18622742 * 2560; err = 0.36406250 * 2560; time = 0.0269s; samplesPerSecond = 95096.6
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.16710892 * 2560; err = 0.35703125 * 2560; time = 0.0272s; samplesPerSecond = 94010.5
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.24683685 * 2560; err = 0.38554688 * 2560; time = 0.0273s; samplesPerSecond = 93793.5
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.18601685 * 2560; err = 0.35273437 * 2560; time = 0.0274s; samplesPerSecond = 93529.6
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.21721497 * 2560; err = 0.37617187 * 2560; time = 0.0277s; samplesPerSecond = 92428.8
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.19934692 * 2560; err = 0.36953125 * 2560; time = 0.0281s; samplesPerSecond = 90999.6
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.15099945 * 2560; err = 0.34257813 * 2560; time = 0.0282s; samplesPerSecond = 90844.6
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.14984589 * 2560; err = 0.35703125 * 2560; time = 0.0282s; samplesPerSecond = 90738.3
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.19028320 * 2560; err = 0.35898438 * 2560; time = 0.0281s; samplesPerSecond = 91158.4
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.16434784 * 2560; err = 0.36406250 * 2560; time = 0.0281s; samplesPerSecond = 90960.8
-05/03/2016 14:26:00:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.08853760 * 2560; err = 0.33359375 * 2560; time = 0.0281s; samplesPerSecond = 91070.8
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.15194244 * 2560; err = 0.35039063 * 2560; time = 0.0283s; samplesPerSecond = 90565.0
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.16113434 * 2560; err = 0.35625000 * 2560; time = 0.0281s; samplesPerSecond = 91054.6
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.18479004 * 2560; err = 0.36757812 * 2560; time = 0.0282s; samplesPerSecond = 90876.8
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.14554138 * 2560; err = 0.34843750 * 2560; time = 0.0281s; samplesPerSecond = 90993.1
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.15263367 * 2560; err = 0.35390625 * 2560; time = 0.0281s; samplesPerSecond = 91103.2
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.08563538 * 2560; err = 0.33437500 * 2560; time = 0.0282s; samplesPerSecond = 90680.5
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.10797424 * 2560; err = 0.34882812 * 2560; time = 0.0282s; samplesPerSecond = 90796.2
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.07031860 * 2560; err = 0.33593750 * 2560; time = 0.0278s; samplesPerSecond = 92169.2
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.09429016 * 2560; err = 0.33476563 * 2560; time = 0.0283s; samplesPerSecond = 90306.2
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.14634094 * 2560; err = 0.35351563 * 2560; time = 0.0280s; samplesPerSecond = 91330.7
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.10476990 * 2560; err = 0.34335938 * 2560; time = 0.0270s; samplesPerSecond = 94899.2
-05/03/2016 14:26:01:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.07355957 * 2560; err = 0.32695313 * 2560; time = 0.0269s; samplesPerSecond = 95213.3
-05/03/2016 14:26:01: Finished Epoch[ 2 of 2]: [Training] ce = 1.16032982 * 81920; err = 0.35574951 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.895366s
-05/03/2016 14:26:01: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech'
-05/03/2016 14:26:01: CNTKCommandTrainEnd: dptPre1
+05/03/2016 14:22:26: Starting minibatch loop.
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.13%]: ce = 1.21869726 * 2560; err = 0.36992188 * 2560; time = 0.0283s; samplesPerSecond = 90417.8
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.18345690 * 2560; err = 0.36679688 * 2560; time = 0.0273s; samplesPerSecond = 93876.1
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.17220421 * 2560; err = 0.35898438 * 2560; time = 0.0276s; samplesPerSecond = 92639.5
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.20035286 * 2560; err = 0.35781250 * 2560; time = 0.0276s; samplesPerSecond = 92699.9
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.19499741 * 2560; err = 0.37460938 * 2560; time = 0.0276s; samplesPerSecond = 92787.2
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.16373482 * 2560; err = 0.34687500 * 2560; time = 0.0276s; samplesPerSecond = 92656.3
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.13869247 * 2560; err = 0.34804687 * 2560; time = 0.0277s; samplesPerSecond = 92552.4
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.19293823 * 2560; err = 0.36992188 * 2560; time = 0.0276s; samplesPerSecond = 92841.1
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.23978348 * 2560; err = 0.37539062 * 2560; time = 0.0276s; samplesPerSecond = 92649.6
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.18622742 * 2560; err = 0.36406250 * 2560; time = 0.0272s; samplesPerSecond = 94114.2
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.16710892 * 2560; err = 0.35703125 * 2560; time = 0.0267s; samplesPerSecond = 95700.9
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.24683685 * 2560; err = 0.38554688 * 2560; time = 0.0266s; samplesPerSecond = 96074.5
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.18601685 * 2560; err = 0.35273437 * 2560; time = 0.0268s; samplesPerSecond = 95419.1
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.21721497 * 2560; err = 0.37617187 * 2560; time = 0.0276s; samplesPerSecond = 92602.6
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.19934692 * 2560; err = 0.36953125 * 2560; time = 0.0277s; samplesPerSecond = 92438.8
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.15099945 * 2560; err = 0.34257813 * 2560; time = 0.0276s; samplesPerSecond = 92730.1
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.14984589 * 2560; err = 0.35703125 * 2560; time = 0.0276s; samplesPerSecond = 92891.6
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.19028320 * 2560; err = 0.35898438 * 2560; time = 0.0276s; samplesPerSecond = 92679.7
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.16434784 * 2560; err = 0.36406250 * 2560; time = 0.0277s; samplesPerSecond = 92572.5
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.08853760 * 2560; err = 0.33359375 * 2560; time = 0.0276s; samplesPerSecond = 92800.7
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.15194244 * 2560; err = 0.35039063 * 2560; time = 0.0276s; samplesPerSecond = 92706.6
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.16113434 * 2560; err = 0.35625000 * 2560; time = 0.0276s; samplesPerSecond = 92797.3
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.18479004 * 2560; err = 0.36757812 * 2560; time = 0.0277s; samplesPerSecond = 92535.7
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.14554138 * 2560; err = 0.34843750 * 2560; time = 0.0276s; samplesPerSecond = 92639.5
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.15263367 * 2560; err = 0.35390625 * 2560; time = 0.0276s; samplesPerSecond = 92622.7
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.08563538 * 2560; err = 0.33437500 * 2560; time = 0.0276s; samplesPerSecond = 92898.4
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.10797424 * 2560; err = 0.34882812 * 2560; time = 0.0283s; samplesPerSecond = 90481.7
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.07031860 * 2560; err = 0.33593750 * 2560; time = 0.0277s; samplesPerSecond = 92565.8
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.09429016 * 2560; err = 0.33476563 * 2560; time = 0.0276s; samplesPerSecond = 92797.3
+05/03/2016 14:22:26:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.14634094 * 2560; err = 0.35351563 * 2560; time = 0.0276s; samplesPerSecond = 92592.6
+05/03/2016 14:22:27:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.10476990 * 2560; err = 0.34335938 * 2560; time = 0.0276s; samplesPerSecond = 92874.8
+05/03/2016 14:22:27:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.07355957 * 2560; err = 0.32695313 * 2560; time = 0.0276s; samplesPerSecond = 92612.7
+05/03/2016 14:22:27: Finished Epoch[ 2 of 2]: [Training] ce = 1.16032982 * 81920; err = 0.35574951 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=0.884889s
+05/03/2016 14:22:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre1/cntkSpeech'
+05/03/2016 14:22:27: CNTKCommandTrainEnd: dptPre1
 
-05/03/2016 14:26:01: Action "train" complete.
+05/03/2016 14:22:27: Action "train" complete.
 
 
-05/03/2016 14:26:01: ##############################################################################
-05/03/2016 14:26:01: #                                                                            #
-05/03/2016 14:26:01: # Action "edit"                                                              #
-05/03/2016 14:26:01: #                                                                            #
-05/03/2016 14:26:01: ##############################################################################
+05/03/2016 14:22:27: ##############################################################################
+05/03/2016 14:22:27: #                                                                            #
+05/03/2016 14:22:27: # Action "edit"                                                              #
+05/03/2016 14:22:27: #                                                                            #
+05/03/2016 14:22:27: ##############################################################################
 
 
 Post-processing network...
@@ -622,26 +620,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 14:26:01: Action "edit" complete.
+05/03/2016 14:22:27: Action "edit" complete.
 
 
-05/03/2016 14:26:01: ##############################################################################
-05/03/2016 14:26:01: #                                                                            #
-05/03/2016 14:26:01: # Action "train"                                                             #
-05/03/2016 14:26:01: #                                                                            #
-05/03/2016 14:26:01: ##############################################################################
+05/03/2016 14:22:27: ##############################################################################
+05/03/2016 14:22:27: #                                                                            #
+05/03/2016 14:22:27: # Action "train"                                                             #
+05/03/2016 14:22:27: #                                                                            #
+05/03/2016 14:22:27: ##############################################################################
 
-05/03/2016 14:26:01: CNTKCommandTrainBegin: dptPre2
+05/03/2016 14:22:27: CNTKCommandTrainBegin: dptPre2
 NDLBuilder Using GPU 0
-Reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:26:01: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0'.
+05/03/2016 14:22:27: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.0'.
 
 Post-processing network...
 
@@ -688,14 +685,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:26:01: Loaded model with 24 nodes on GPU 0.
+05/03/2016 14:22:27: Loaded model with 24 nodes on GPU 0.
 
-05/03/2016 14:26:01: Training criterion node(s):
-05/03/2016 14:26:01: 	ce = CrossEntropyWithSoftmax
+05/03/2016 14:22:27: Training criterion node(s):
+05/03/2016 14:22:27: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 14:26:01: Evaluation criterion node(s):
+05/03/2016 14:22:27: Evaluation criterion node(s):
 
-05/03/2016 14:26:01: 	err = ErrorPrediction
+05/03/2016 14:22:27: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -703,122 +700,125 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[err Gradient[1]] [featNorm Gradient[363 x *3]] [features Gradient[363 x *3]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *3]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *3]] }
-000000220FCC70E0: {[HL1.t Gradient[512 x *3]] [HL1.y Value[512 x 1 x *3]] }
-000000220FCC7720: {[scaledLogLikelihood Value[132 x 1 x *3]] }
-000000220FCC7D60: {[logPrior Value[132 x 1]] }
-000000220FCC7EA0: {[featNorm Value[363 x *3]] }
-000000220FCC7F40: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *3]] }
-000000220FCC8080: {[HL1.z Gradient[512 x 1 x *3]] [HL2.t Value[512 x 1 x *3]] }
-000000220FCC8440: {[HL1.t Value[512 x *3]] }
-000000220FCC8620: {[ce Value[1]] }
-000000220FCC86C0: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *3]] }
-000000220FCC8760: {[HL2.t Gradient[512 x 1 x *3]] [HL2.y Value[512 x 1 x *3]] }
-000000220FCC8800: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *3]] [HL2.z Gradient[512 x 1 x *3]] [OL.t Value[132 x 1 x *3]] }
-000000220FCC88A0: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *3]] }
-000000220FD0D840: {[OL.b Gradient[132 x 1]] }
-000000220FD0DF20: {[OL.t Gradient[132 x 1 x *3]] }
-000000220FD0E920: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *3]] [OL.z Gradient[132 x 1 x *3]] }
-000000220FD0EBA0: {[ce Gradient[1]] }
-00000022282B7850: {[HL2.b Value[512 x 1]] }
-00000022282B7B70: {[globalInvStd Value[363 x 1]] }
-00000022282B8570: {[features Value[363 x *3]] }
-00000022282B8930: {[globalMean Value[363 x 1]] }
-00000022282B90B0: {[HL1.W Value[512 x 363]] }
-00000022282B9330: {[HL1.b Value[512 x 1]] }
-00000022282B93D0: {[globalPrior Value[132 x 1]] }
-000000222B4C6E50: {[labels Value[132 x *3]] }
-000000222B4C72B0: {[HL2.W Value[512 x 512]] }
-000000222B4C7990: {[OL.b Value[132 x 1]] }
-000000222B4C7D50: {[OL.W Value[132 x 512]] }
-000000222B4C8570: {[err Value[1]] }
+000000EB9F9B00F0: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *3]] }
+000000EB9F9B0230: {[HL1.z Gradient[512 x 1 x *3]] [HL2.t Value[512 x 1 x *3]] }
+000000EB9F9B0870: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *3]] }
+000000EB9F9B09B0: {[HL2.t Gradient[512 x 1 x *3]] [HL2.y Value[512 x 1 x *3]] }
+000000EB9F9B0AF0: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *3]] [HL2.z Gradient[512 x 1 x *3]] [OL.t Value[132 x 1 x *3]] }
+000000EB9F9B1310: {[HL1.t Value[512 x *3]] }
+000000EB9F9B16D0: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *3]] }
+000000EB9F9B1810: {[HL1.t Gradient[512 x *3]] [HL1.y Value[512 x 1 x *3]] }
+000000EBB77CD290: {[globalMean Value[363 x 1]] }
+000000EBB77CD470: {[globalPrior Value[132 x 1]] }
+000000EBB77CD8D0: {[HL2.b Value[512 x 1]] }
+000000EBB77CE230: {[globalInvStd Value[363 x 1]] }
+000000EBB77CE690: {[features Value[363 x *3]] }
+000000EBB77CE730: {[HL1.b Value[512 x 1]] }
+000000EBB77CEA50: {[HL1.W Value[512 x 363]] }
+000000EBB9FFCB40: {[HL2.W Value[512 x 512]] }
+000000EBB9FFCD20: {[ce Value[1]] }
+000000EBB9FFCDC0: {[logPrior Value[132 x 1]] }
+000000EBB9FFCE60: {[featNorm Value[363 x *3]] }
+000000EBB9FFCF00: {[err Value[1]] }
+000000EBB9FFD4A0: {[OL.b Value[132 x 1]] }
+000000EBB9FFDCC0: {[OL.W Value[132 x 512]] }
+000000EBB9FFE620: {[labels Value[132 x *3]] }
+000000EBB9FFE8A0: {[scaledLogLikelihood Value[132 x 1 x *3]] }
+000000EBC0A90560: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *3]] [OL.z Gradient[132 x 1 x *3]] }
+000000EBC0A90600: {[OL.b Gradient[132 x 1]] }
+000000EBC0A909C0: {[OL.t Gradient[132 x 1 x *3]] }
+000000EBC0A90D80: {[ce Gradient[1]] }
 
-05/03/2016 14:26:01: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:22:27: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:26:01: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 14:22:27: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:26:01: Starting minibatch loop.
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.13%]: ce = 4.49739113 * 2560; err = 0.80429688 * 2560; time = 0.1729s; samplesPerSecond = 14807.7
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.83226433 * 2560; err = 0.68125000 * 2560; time = 0.0400s; samplesPerSecond = 63984.0
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.25921097 * 2560; err = 0.59921875 * 2560; time = 0.0404s; samplesPerSecond = 63400.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.91240921 * 2560; err = 0.51210937 * 2560; time = 0.0397s; samplesPerSecond = 64456.0
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.69259949 * 2560; err = 0.46679688 * 2560; time = 0.0399s; samplesPerSecond = 64092.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.59069672 * 2560; err = 0.45312500 * 2560; time = 0.0396s; samplesPerSecond = 64578.0
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.48813324 * 2560; err = 0.43789062 * 2560; time = 0.0396s; samplesPerSecond = 64649.7
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.48960571 * 2560; err = 0.43515625 * 2560; time = 0.0398s; samplesPerSecond = 64347.5
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.45628204 * 2560; err = 0.42187500 * 2560; time = 0.0395s; samplesPerSecond = 64870.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.41567383 * 2560; err = 0.40820313 * 2560; time = 0.0394s; samplesPerSecond = 65015.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.42048950 * 2560; err = 0.41406250 * 2560; time = 0.0406s; samplesPerSecond = 62995.2
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.34279480 * 2560; err = 0.39726563 * 2560; time = 0.0407s; samplesPerSecond = 62859.1
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.31633148 * 2560; err = 0.38789062 * 2560; time = 0.0408s; samplesPerSecond = 62786.6
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.33296814 * 2560; err = 0.39804688 * 2560; time = 0.0408s; samplesPerSecond = 62819.0
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.32084351 * 2560; err = 0.39609375 * 2560; time = 0.0402s; samplesPerSecond = 63695.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.27189636 * 2560; err = 0.38125000 * 2560; time = 0.0408s; samplesPerSecond = 62674.4
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.29380188 * 2560; err = 0.38554688 * 2560; time = 0.0408s; samplesPerSecond = 62711.3
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.31463013 * 2560; err = 0.38984375 * 2560; time = 0.0402s; samplesPerSecond = 63605.6
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.33578796 * 2560; err = 0.40664062 * 2560; time = 0.0409s; samplesPerSecond = 62642.2
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.32202454 * 2560; err = 0.41484375 * 2560; time = 0.0408s; samplesPerSecond = 62805.1
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.23669434 * 2560; err = 0.37460938 * 2560; time = 0.0403s; samplesPerSecond = 63561.4
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.27109985 * 2560; err = 0.38906250 * 2560; time = 0.0408s; samplesPerSecond = 62742.0
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.26419678 * 2560; err = 0.37578125 * 2560; time = 0.0396s; samplesPerSecond = 64595.9
-05/03/2016 14:26:02:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.23778992 * 2560; err = 0.37265625 * 2560; time = 0.0397s; samplesPerSecond = 64538.9
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.21040344 * 2560; err = 0.36757812 * 2560; time = 0.0406s; samplesPerSecond = 62999.9
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.18387146 * 2560; err = 0.36562500 * 2560; time = 0.0407s; samplesPerSecond = 62896.2
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.23827515 * 2560; err = 0.37148437 * 2560; time = 0.0401s; samplesPerSecond = 63776.8
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.18418274 * 2560; err = 0.36328125 * 2560; time = 0.0406s; samplesPerSecond = 63110.1
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.16851501 * 2560; err = 0.35234375 * 2560; time = 0.0406s; samplesPerSecond = 63127.3
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.14337463 * 2560; err = 0.34375000 * 2560; time = 0.0395s; samplesPerSecond = 64877.5
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.17227478 * 2560; err = 0.34882812 * 2560; time = 0.0395s; samplesPerSecond = 64841.3
-05/03/2016 14:26:03:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.18431091 * 2560; err = 0.36835937 * 2560; time = 0.0397s; samplesPerSecond = 64543.8
-05/03/2016 14:26:03: Finished Epoch[ 1 of 2]: [Training] ce = 1.51252575 * 81920; err = 0.42452393 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.48053s
-05/03/2016 14:26:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.1'
+05/03/2016 14:22:27: Starting minibatch loop.
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[   1-  10, 3.13%]: ce = 4.49739113 * 2560; err = 0.80429688 * 2560; time = 0.0477s; samplesPerSecond = 53647.4
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  11-  20, 6.25%]: ce = 2.83226433 * 2560; err = 0.68125000 * 2560; time = 0.0396s; samplesPerSecond = 64568.2
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  21-  30, 9.38%]: ce = 2.25921097 * 2560; err = 0.59921875 * 2560; time = 0.0399s; samplesPerSecond = 64092.9
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.91240921 * 2560; err = 0.51210937 * 2560; time = 0.0405s; samplesPerSecond = 63267.7
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.69259949 * 2560; err = 0.46679688 * 2560; time = 0.0401s; samplesPerSecond = 63772.0
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.59069672 * 2560; err = 0.45312500 * 2560; time = 0.0402s; samplesPerSecond = 63727.6
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.48813324 * 2560; err = 0.43789062 * 2560; time = 0.0402s; samplesPerSecond = 63608.8
+05/03/2016 14:22:27:  Epoch[ 1 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.48960571 * 2560; err = 0.43515625 * 2560; time = 0.0402s; samplesPerSecond = 63613.5
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.45628204 * 2560; err = 0.42187500 * 2560; time = 0.0402s; samplesPerSecond = 63759.3
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.41567383 * 2560; err = 0.40820313 * 2560; time = 0.0399s; samplesPerSecond = 64213.5
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.42048950 * 2560; err = 0.41406250 * 2560; time = 0.0402s; samplesPerSecond = 63616.7
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.34279480 * 2560; err = 0.39726563 * 2560; time = 0.0399s; samplesPerSecond = 64128.3
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.31633148 * 2560; err = 0.38789062 * 2560; time = 0.0396s; samplesPerSecond = 64576.3
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.33296814 * 2560; err = 0.39804688 * 2560; time = 0.0401s; samplesPerSecond = 63764.1
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.32084351 * 2560; err = 0.39609375 * 2560; time = 0.0402s; samplesPerSecond = 63687.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.27189636 * 2560; err = 0.38125000 * 2560; time = 0.0403s; samplesPerSecond = 63585.1
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.29380188 * 2560; err = 0.38554688 * 2560; time = 0.0402s; samplesPerSecond = 63748.2
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.31463013 * 2560; err = 0.38984375 * 2560; time = 0.0402s; samplesPerSecond = 63753.0
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.33578796 * 2560; err = 0.40664062 * 2560; time = 0.0402s; samplesPerSecond = 63730.7
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.32202454 * 2560; err = 0.41484375 * 2560; time = 0.0399s; samplesPerSecond = 64215.1
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.23669434 * 2560; err = 0.37460938 * 2560; time = 0.0403s; samplesPerSecond = 63602.5
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.27109985 * 2560; err = 0.38906250 * 2560; time = 0.0402s; samplesPerSecond = 63695.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.26419678 * 2560; err = 0.37578125 * 2560; time = 0.0403s; samplesPerSecond = 63466.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.23778992 * 2560; err = 0.37265625 * 2560; time = 0.0402s; samplesPerSecond = 63687.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.21040344 * 2560; err = 0.36757812 * 2560; time = 0.0402s; samplesPerSecond = 63732.3
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.18387146 * 2560; err = 0.36562500 * 2560; time = 0.0402s; samplesPerSecond = 63675.3
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.23827515 * 2560; err = 0.37148437 * 2560; time = 0.0401s; samplesPerSecond = 63792.7
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.18418274 * 2560; err = 0.36328125 * 2560; time = 0.0402s; samplesPerSecond = 63640.4
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.16851501 * 2560; err = 0.35234375 * 2560; time = 0.0402s; samplesPerSecond = 63699.0
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.14337463 * 2560; err = 0.34375000 * 2560; time = 0.0403s; samplesPerSecond = 63600.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.17227478 * 2560; err = 0.34882812 * 2560; time = 0.0402s; samplesPerSecond = 63649.9
+05/03/2016 14:22:28:  Epoch[ 1 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.18431091 * 2560; err = 0.36835937 * 2560; time = 0.0403s; samplesPerSecond = 63504.7
+05/03/2016 14:22:28: Finished Epoch[ 1 of 2]: [Training] ce = 1.51252575 * 81920; err = 0.42452393 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.44851s
+05/03/2016 14:22:28: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech.1'
 
-05/03/2016 14:26:03: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+05/03/2016 14:22:28: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:26:03: Starting minibatch loop.
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.13%]: ce = 1.17448177 * 2560; err = 0.35195312 * 2560; time = 0.0403s; samplesPerSecond = 63593.0
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.14536762 * 2560; err = 0.35664062 * 2560; time = 0.0393s; samplesPerSecond = 65060.5
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.15722923 * 2560; err = 0.34531250 * 2560; time = 0.0392s; samplesPerSecond = 65296.1
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.14344521 * 2560; err = 0.34804687 * 2560; time = 0.0395s; samplesPerSecond = 64841.3
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.14842377 * 2560; err = 0.36562500 * 2560; time = 0.0394s; samplesPerSecond = 65022.5
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.14489059 * 2560; err = 0.34218750 * 2560; time = 0.0394s; samplesPerSecond = 65002.7
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.09631271 * 2560; err = 0.33984375 * 2560; time = 0.0393s; samplesPerSecond = 65221.3
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.16026917 * 2560; err = 0.35546875 * 2560; time = 0.0393s; samplesPerSecond = 65214.6
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.16528015 * 2560; err = 0.36015625 * 2560; time = 0.0393s; samplesPerSecond = 65216.3
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.12257309 * 2560; err = 0.34492187 * 2560; time = 0.0393s; samplesPerSecond = 65188.1
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.12313080 * 2560; err = 0.34765625 * 2560; time = 0.0392s; samplesPerSecond = 65261.2
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.18492126 * 2560; err = 0.36171875 * 2560; time = 0.0393s; samplesPerSecond = 65143.3
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.13058014 * 2560; err = 0.33476563 * 2560; time = 0.0393s; samplesPerSecond = 65087.0
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.16725922 * 2560; err = 0.35781250 * 2560; time = 0.0394s; samplesPerSecond = 65007.6
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.12244720 * 2560; err = 0.34648438 * 2560; time = 0.0405s; samplesPerSecond = 63220.8
-05/03/2016 14:26:03:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.09480591 * 2560; err = 0.33671875 * 2560; time = 0.0404s; samplesPerSecond = 63338.1
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.11218109 * 2560; err = 0.34140625 * 2560; time = 0.0407s; samplesPerSecond = 62917.8
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.11966095 * 2560; err = 0.33398438 * 2560; time = 0.0405s; samplesPerSecond = 63252.0
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.10485687 * 2560; err = 0.33671875 * 2560; time = 0.0405s; samplesPerSecond = 63250.5
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.06019897 * 2560; err = 0.32617188 * 2560; time = 0.0403s; samplesPerSecond = 63452.7
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.10600891 * 2560; err = 0.34101562 * 2560; time = 0.0407s; samplesPerSecond = 62924.0
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.13724823 * 2560; err = 0.34101562 * 2560; time = 0.0411s; samplesPerSecond = 62355.4
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12464600 * 2560; err = 0.34609375 * 2560; time = 0.0401s; samplesPerSecond = 63881.8
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.10831604 * 2560; err = 0.33593750 * 2560; time = 0.0407s; samplesPerSecond = 62973.5
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.09707031 * 2560; err = 0.34023437 * 2560; time = 0.0394s; samplesPerSecond = 64984.5
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.04812317 * 2560; err = 0.32773438 * 2560; time = 0.0393s; samplesPerSecond = 65178.1
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.04979248 * 2560; err = 0.33398438 * 2560; time = 0.0393s; samplesPerSecond = 65118.4
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.03223572 * 2560; err = 0.31835938 * 2560; time = 0.0393s; samplesPerSecond = 65077.0
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.05677490 * 2560; err = 0.32773438 * 2560; time = 0.0392s; samplesPerSecond = 65271.2
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.10880737 * 2560; err = 0.34296875 * 2560; time = 0.0393s; samplesPerSecond = 65128.3
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.08513489 * 2560; err = 0.33476563 * 2560; time = 0.0393s; samplesPerSecond = 65186.4
-05/03/2016 14:26:04:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.04244080 * 2560; err = 0.31757812 * 2560; time = 0.0397s; samplesPerSecond = 64473.9
-05/03/2016 14:26:04: Finished Epoch[ 2 of 2]: [Training] ce = 1.11484108 * 81920; err = 0.34190674 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=1.28012s
-05/03/2016 14:26:04: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech'
-05/03/2016 14:26:04: CNTKCommandTrainEnd: dptPre2
+05/03/2016 14:22:28: Starting minibatch loop.
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[   1-  10, 3.13%]: ce = 1.17448177 * 2560; err = 0.35195312 * 2560; time = 0.0401s; samplesPerSecond = 63802.2
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  11-  20, 6.25%]: ce = 1.14536762 * 2560; err = 0.35664062 * 2560; time = 0.0392s; samplesPerSecond = 65267.8
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  21-  30, 9.38%]: ce = 1.15722923 * 2560; err = 0.34531250 * 2560; time = 0.0399s; samplesPerSecond = 64237.7
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  31-  40, 12.50%]: ce = 1.14344521 * 2560; err = 0.34804687 * 2560; time = 0.0397s; samplesPerSecond = 64561.7
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  41-  50, 15.63%]: ce = 1.14842377 * 2560; err = 0.36562500 * 2560; time = 0.0395s; samplesPerSecond = 64741.3
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  51-  60, 18.75%]: ce = 1.14489059 * 2560; err = 0.34218750 * 2560; time = 0.0397s; samplesPerSecond = 64496.6
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  61-  70, 21.88%]: ce = 1.09631271 * 2560; err = 0.33984375 * 2560; time = 0.0387s; samplesPerSecond = 66196.1
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  71-  80, 25.00%]: ce = 1.16026917 * 2560; err = 0.35546875 * 2560; time = 0.0396s; samplesPerSecond = 64692.2
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  81-  90, 28.13%]: ce = 1.16528015 * 2560; err = 0.36015625 * 2560; time = 0.0402s; samplesPerSecond = 63651.5
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[  91- 100, 31.25%]: ce = 1.12257309 * 2560; err = 0.34492187 * 2560; time = 0.0402s; samplesPerSecond = 63735.5
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 101- 110, 34.38%]: ce = 1.12313080 * 2560; err = 0.34765625 * 2560; time = 0.0394s; samplesPerSecond = 64984.5
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 111- 120, 37.50%]: ce = 1.18492126 * 2560; err = 0.36171875 * 2560; time = 0.0391s; samplesPerSecond = 65463.1
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 121- 130, 40.63%]: ce = 1.13058014 * 2560; err = 0.33476563 * 2560; time = 0.0401s; samplesPerSecond = 63878.6
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 131- 140, 43.75%]: ce = 1.16725922 * 2560; err = 0.35781250 * 2560; time = 0.0401s; samplesPerSecond = 63802.2
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 141- 150, 46.88%]: ce = 1.12244720 * 2560; err = 0.34648438 * 2560; time = 0.0402s; samplesPerSecond = 63686.3
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 151- 160, 50.00%]: ce = 1.09480591 * 2560; err = 0.33671875 * 2560; time = 0.0402s; samplesPerSecond = 63749.8
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 161- 170, 53.13%]: ce = 1.11218109 * 2560; err = 0.34140625 * 2560; time = 0.0401s; samplesPerSecond = 63830.8
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 171- 180, 56.25%]: ce = 1.11966095 * 2560; err = 0.33398438 * 2560; time = 0.0401s; samplesPerSecond = 63822.9
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 181- 190, 59.38%]: ce = 1.10485687 * 2560; err = 0.33671875 * 2560; time = 0.0402s; samplesPerSecond = 63703.8
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 191- 200, 62.50%]: ce = 1.06019897 * 2560; err = 0.32617188 * 2560; time = 0.0403s; samplesPerSecond = 63566.2
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 201- 210, 65.63%]: ce = 1.10600891 * 2560; err = 0.34101562 * 2560; time = 0.0402s; samplesPerSecond = 63714.9
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 211- 220, 68.75%]: ce = 1.13724823 * 2560; err = 0.34101562 * 2560; time = 0.0401s; samplesPerSecond = 63783.1
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 221- 230, 71.88%]: ce = 1.12464600 * 2560; err = 0.34609375 * 2560; time = 0.0401s; samplesPerSecond = 63902.5
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 231- 240, 75.00%]: ce = 1.10831604 * 2560; err = 0.33593750 * 2560; time = 0.0403s; samplesPerSecond = 63518.8
+05/03/2016 14:22:29:  Epoch[ 2 of 2]-Minibatch[ 241- 250, 78.13%]: ce = 1.09707031 * 2560; err = 0.34023437 * 2560; time = 0.0400s; samplesPerSecond = 63993.6
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 251- 260, 81.25%]: ce = 1.04812317 * 2560; err = 0.32773438 * 2560; time = 0.0401s; samplesPerSecond = 63832.4
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 261- 270, 84.38%]: ce = 1.04979248 * 2560; err = 0.33398438 * 2560; time = 0.0401s; samplesPerSecond = 63760.9
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 271- 280, 87.50%]: ce = 1.03223572 * 2560; err = 0.31835938 * 2560; time = 0.0400s; samplesPerSecond = 64004.8
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 281- 290, 90.63%]: ce = 1.05677490 * 2560; err = 0.32773438 * 2560; time = 0.0404s; samplesPerSecond = 63433.9
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 291- 300, 93.75%]: ce = 1.10880737 * 2560; err = 0.34296875 * 2560; time = 0.0402s; samplesPerSecond = 63624.6
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 301- 310, 96.88%]: ce = 1.08513489 * 2560; err = 0.33476563 * 2560; time = 0.0401s; samplesPerSecond = 63792.7
+05/03/2016 14:22:30:  Epoch[ 2 of 2]-Minibatch[ 311- 320, 100.00%]: ce = 1.04244080 * 2560; err = 0.31757812 * 2560; time = 0.0401s; samplesPerSecond = 63829.3
+05/03/2016 14:22:30: Finished Epoch[ 2 of 2]: [Training] ce = 1.11484108 * 81920; err = 0.34190674 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=1.28118s
+05/03/2016 14:22:30: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/Pre2/cntkSpeech'
+05/03/2016 14:22:30: CNTKCommandTrainEnd: dptPre2
 
-05/03/2016 14:26:04: Action "train" complete.
+05/03/2016 14:22:30: Action "train" complete.
 
 
-05/03/2016 14:26:04: ##############################################################################
-05/03/2016 14:26:04: #                                                                            #
-05/03/2016 14:26:04: # Action "edit"                                                              #
-05/03/2016 14:26:04: #                                                                            #
-05/03/2016 14:26:04: ##############################################################################
+05/03/2016 14:22:30: ##############################################################################
+05/03/2016 14:22:30: #                                                                            #
+05/03/2016 14:22:30: # Action "edit"                                                              #
+05/03/2016 14:22:30: #                                                                            #
+05/03/2016 14:22:30: ##############################################################################
 
 
 Post-processing network...
@@ -918,26 +918,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 14:26:04: Action "edit" complete.
+05/03/2016 14:22:30: Action "edit" complete.
 
 
-05/03/2016 14:26:04: ##############################################################################
-05/03/2016 14:26:04: #                                                                            #
-05/03/2016 14:26:04: # Action "train"                                                             #
-05/03/2016 14:26:04: #                                                                            #
-05/03/2016 14:26:04: ##############################################################################
+05/03/2016 14:22:30: ##############################################################################
+05/03/2016 14:22:30: #                                                                            #
+05/03/2016 14:22:30: # Action "train"                                                             #
+05/03/2016 14:22:30: #                                                                            #
+05/03/2016 14:22:30: ##############################################################################
 
-05/03/2016 14:26:04: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:22:30: CNTKCommandTrainBegin: speechTrain
 NDLBuilder Using GPU 0
-Reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:26:05: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0'.
+05/03/2016 14:22:30: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.0'.
 
 Post-processing network...
 
@@ -989,14 +988,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:26:05: Loaded model with 29 nodes on GPU 0.
+05/03/2016 14:22:30: Loaded model with 29 nodes on GPU 0.
 
-05/03/2016 14:26:05: Training criterion node(s):
-05/03/2016 14:26:05: 	ce = CrossEntropyWithSoftmax
+05/03/2016 14:22:30: Training criterion node(s):
+05/03/2016 14:22:30: 	ce = CrossEntropyWithSoftmax
 
-05/03/2016 14:26:05: Evaluation criterion node(s):
+05/03/2016 14:22:30: Evaluation criterion node(s):
 
-05/03/2016 14:26:05: 	err = ErrorPrediction
+05/03/2016 14:22:30: 	err = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -1004,147 +1003,152 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[err Gradient[1]] [featNorm Gradient[363 x *6]] [features Gradient[363 x *6]] [globalInvStd Gradient[363 x 1]] [globalMean Gradient[363 x 1]] [globalPrior Gradient[132 x 1]] [labels Gradient[132 x *6]] [logPrior Gradient[132 x 1]] [scaledLogLikelihood Gradient[132 x 1 x *6]] }
-000000220FC63220: {[HL3.W Value[512 x 512]] }
-000000220FCC7EA0: {[HL2.W Value[512 x 512]] }
-000000220FCC88A0: {[HL3.b Value[512 x 1]] }
-00000022282B77B0: {[globalMean Value[363 x 1]] }
-00000022282B7850: {[globalPrior Value[132 x 1]] }
-00000022282B82F0: {[HL1.b Value[512 x 1]] }
-00000022282B9290: {[features Value[363 x *6]] }
-00000022282B93D0: {[globalInvStd Value[363 x 1]] }
-000000222B4C7FD0: {[HL1.W Value[512 x 363]] }
-000000222B4C8390: {[HL2.b Value[512 x 1]] }
-0000002239037B70: {[HL3.b Gradient[512 x 1]] [HL3.y Gradient[512 x 1 x *6]] [OL.z Gradient[132 x 1 x *6]] }
-0000002239037C10: {[OL.t Gradient[132 x 1 x *6]] }
-0000002239037D50: {[HL1.t Gradient[512 x *6]] [HL1.y Value[512 x 1 x *6]] }
-0000002239037DF0: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *6]] [HL3.z Gradient[512 x 1 x *6]] [OL.t Value[132 x 1 x *6]] }
-0000002239037E90: {[HL1.t Value[512 x *6]] }
-0000002239037FD0: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *6]] }
-0000002239038110: {[ce Gradient[1]] }
-0000002239038250: {[labels Value[132 x *6]] }
-0000002239038390: {[logPrior Value[132 x 1]] }
-0000002239038430: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *6]] }
-0000002239038570: {[ce Value[1]] }
-0000002239038610: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *6]] [HL2.z Gradient[512 x 1 x *6]] [HL3.t Value[512 x 1 x *6]] }
-0000002239038750: {[HL2.t Gradient[512 x 1 x *6]] [HL2.y Value[512 x 1 x *6]] }
-0000002239038B10: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *6]] }
-0000002239038BB0: {[OL.b Value[132 x 1]] }
-0000002239038C50: {[HL3.t Gradient[512 x 1 x *6]] [HL3.y Value[512 x 1 x *6]] }
-0000002239038F70: {[OL.W Value[132 x 512]] }
-0000002239039010: {[OL.b Gradient[132 x 1]] }
-00000022390390B0: {[featNorm Value[363 x *6]] }
-0000002239039470: {[err Value[1]] }
-0000002239039510: {[scaledLogLikelihood Value[132 x 1 x *6]] }
-0000002239039650: {[HL3.W Gradient[512 x 512]] [HL3.z Value[512 x 1 x *6]] }
-00000022390396F0: {[HL1.z Gradient[512 x 1 x *6]] [HL2.t Value[512 x 1 x *6]] }
+000000EB9F9B0870: {[HL3.b Value[512 x 1]] }
+000000EB9F9B0B90: {[HL3.W Value[512 x 512]] }
+000000EBA166B790: {[featNorm Value[363 x *6]] }
+000000EBA166B8D0: {[HL2.t Gradient[512 x 1 x *6]] [HL2.y Value[512 x 1 x *6]] }
+000000EBA166B970: {[ce Gradient[1]] }
+000000EBA166BA10: {[OL.W Gradient[132 x 512]] [OL.z Value[132 x 1 x *6]] }
+000000EBA166BAB0: {[ce Value[1]] }
+000000EBA166BB50: {[HL3.b Gradient[512 x 1]] [HL3.y Gradient[512 x 1 x *6]] [OL.z Gradient[132 x 1 x *6]] }
+000000EBA166BD30: {[HL1.t Value[512 x *6]] }
+000000EBA166BDD0: {[err Value[1]] }
+000000EBA166BE70: {[OL.t Gradient[132 x 1 x *6]] }
+000000EBA166BF10: {[labels Value[132 x *6]] }
+000000EBA166C050: {[HL1.b Gradient[512 x 1]] [HL1.y Gradient[512 x 1 x *6]] [HL2.z Gradient[512 x 1 x *6]] [HL3.t Value[512 x 1 x *6]] }
+000000EBA166C0F0: {[HL3.W Gradient[512 x 512]] [HL3.z Value[512 x 1 x *6]] }
+000000EBA166C190: {[HL1.W Gradient[512 x 363]] [HL1.z Value[512 x 1 x *6]] }
+000000EBA166C2D0: {[OL.b Gradient[132 x 1]] }
+000000EBA166C550: {[OL.W Value[132 x 512]] }
+000000EBA166C7D0: {[HL2.b Gradient[512 x 1]] [HL2.y Gradient[512 x 1 x *6]] [HL3.z Gradient[512 x 1 x *6]] [OL.t Value[132 x 1 x *6]] }
+000000EBA166CB90: {[HL1.t Gradient[512 x *6]] [HL1.y Value[512 x 1 x *6]] }
+000000EBA166CC30: {[HL2.W Gradient[512 x 512]] [HL2.z Value[512 x 1 x *6]] }
+000000EBA166CCD0: {[HL1.z Gradient[512 x 1 x *6]] [HL2.t Value[512 x 1 x *6]] }
+000000EBA166CD70: {[scaledLogLikelihood Value[132 x 1 x *6]] }
+000000EBA166CF50: {[logPrior Value[132 x 1]] }
+000000EBA166D1D0: {[HL3.t Gradient[512 x 1 x *6]] [HL3.y Value[512 x 1 x *6]] }
+000000EBA166D3B0: {[OL.b Value[132 x 1]] }
+000000EBB77CD790: {[globalPrior Value[132 x 1]] }
+000000EBB77CD970: {[globalInvStd Value[363 x 1]] }
+000000EBB77CE730: {[features Value[363 x *6]] }
+000000EBB77CE870: {[globalMean Value[363 x 1]] }
+000000EBB77CEB90: {[HL1.b Value[512 x 1]] }
+000000EBB9FFCDC0: {[HL2.W Value[512 x 512]] }
+000000EBB9FFDCC0: {[HL1.W Value[512 x 363]] }
+000000EBB9FFDEA0: {[HL2.b Value[512 x 1]] }
 
-05/03/2016 14:26:05: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:22:30: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:26:05: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900117  momentum as time constant = 2432.7 samples
+05/03/2016 14:22:30: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.900117  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 0: frames [0..81920] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:26:05: Starting minibatch loop.
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: ce = 4.12455330 * 2560; err = 0.82734375 * 2560; time = 0.1873s; samplesPerSecond = 13664.3
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: ce = 2.55599785 * 2560; err = 0.63007813 * 2560; time = 0.0529s; samplesPerSecond = 48373.1
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: ce = 2.03516159 * 2560; err = 0.53945312 * 2560; time = 0.0530s; samplesPerSecond = 48330.2
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: ce = 1.73739853 * 2560; err = 0.47500000 * 2560; time = 0.0530s; samplesPerSecond = 48340.2
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: ce = 1.54207916 * 2560; err = 0.43515625 * 2560; time = 0.0530s; samplesPerSecond = 48332.9
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: ce = 1.44409790 * 2560; err = 0.41328125 * 2560; time = 0.0533s; samplesPerSecond = 48056.2
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: ce = 1.36059418 * 2560; err = 0.40898438 * 2560; time = 0.0528s; samplesPerSecond = 48492.2
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: ce = 1.35930023 * 2560; err = 0.40117188 * 2560; time = 0.0522s; samplesPerSecond = 49018.7
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: ce = 1.34254303 * 2560; err = 0.38632813 * 2560; time = 0.0531s; samplesPerSecond = 48206.4
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: ce = 1.30505676 * 2560; err = 0.38320312 * 2560; time = 0.0530s; samplesPerSecond = 48342.0
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: ce = 1.30881348 * 2560; err = 0.38476563 * 2560; time = 0.0528s; samplesPerSecond = 48459.2
-05/03/2016 14:26:05:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: ce = 1.23755188 * 2560; err = 0.37304688 * 2560; time = 0.0530s; samplesPerSecond = 48281.8
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: ce = 1.21070251 * 2560; err = 0.35546875 * 2560; time = 0.0530s; samplesPerSecond = 48303.7
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: ce = 1.24008789 * 2560; err = 0.37109375 * 2560; time = 0.0528s; samplesPerSecond = 48464.7
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: ce = 1.23422089 * 2560; err = 0.36835937 * 2560; time = 0.0528s; samplesPerSecond = 48452.7
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: ce = 1.19425964 * 2560; err = 0.35195312 * 2560; time = 0.0529s; samplesPerSecond = 48390.5
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: ce = 1.21415710 * 2560; err = 0.36289063 * 2560; time = 0.0531s; samplesPerSecond = 48182.8
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: ce = 1.24289856 * 2560; err = 0.37031250 * 2560; time = 0.0530s; samplesPerSecond = 48341.1
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: ce = 1.26465759 * 2560; err = 0.38359375 * 2560; time = 0.0529s; samplesPerSecond = 48366.7
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: ce = 1.22050476 * 2560; err = 0.38085938 * 2560; time = 0.0529s; samplesPerSecond = 48349.3
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: ce = 1.17745056 * 2560; err = 0.35507813 * 2560; time = 0.0528s; samplesPerSecond = 48467.4
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: ce = 1.19851379 * 2560; err = 0.37109375 * 2560; time = 0.0518s; samplesPerSecond = 49456.2
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: ce = 1.21453857 * 2560; err = 0.35820313 * 2560; time = 0.0532s; samplesPerSecond = 48145.6
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: ce = 1.18011475 * 2560; err = 0.35546875 * 2560; time = 0.0529s; samplesPerSecond = 48369.4
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: ce = 1.16693726 * 2560; err = 0.35195312 * 2560; time = 0.0530s; samplesPerSecond = 48291.9
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: ce = 1.12398987 * 2560; err = 0.35234375 * 2560; time = 0.0530s; samplesPerSecond = 48260.9
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: ce = 1.18822021 * 2560; err = 0.36328125 * 2560; time = 0.0529s; samplesPerSecond = 48372.2
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: ce = 1.13831482 * 2560; err = 0.35078125 * 2560; time = 0.0530s; samplesPerSecond = 48304.6
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: ce = 1.12718811 * 2560; err = 0.33984375 * 2560; time = 0.0529s; samplesPerSecond = 48375.8
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: ce = 1.11155396 * 2560; err = 0.34179688 * 2560; time = 0.0530s; samplesPerSecond = 48306.4
-05/03/2016 14:26:06:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: ce = 1.13423157 * 2560; err = 0.34101562 * 2560; time = 0.0529s; samplesPerSecond = 48354.8
-05/03/2016 14:26:07:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: ce = 1.12716675 * 2560; err = 0.34414062 * 2560; time = 0.0529s; samplesPerSecond = 48386.8
-05/03/2016 14:26:07: Finished Epoch[ 1 of 4]: [Training] ce = 1.40821428 * 81920; err = 0.40085449 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.89039s
-05/03/2016 14:26:07: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.1'
+05/03/2016 14:22:30: Starting minibatch loop.
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: ce = 4.12455330 * 2560; err = 0.82734375 * 2560; time = 0.0630s; samplesPerSecond = 40603.3
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: ce = 2.55599785 * 2560; err = 0.63007813 * 2560; time = 0.0528s; samplesPerSecond = 48503.2
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: ce = 2.03516159 * 2560; err = 0.53945312 * 2560; time = 0.0529s; samplesPerSecond = 48354.8
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: ce = 1.73739853 * 2560; err = 0.47500000 * 2560; time = 0.0529s; samplesPerSecond = 48364.9
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: ce = 1.54207916 * 2560; err = 0.43515625 * 2560; time = 0.0530s; samplesPerSecond = 48339.3
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: ce = 1.44409790 * 2560; err = 0.41328125 * 2560; time = 0.0529s; samplesPerSecond = 48362.1
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: ce = 1.36059418 * 2560; err = 0.40898438 * 2560; time = 0.0529s; samplesPerSecond = 48372.2
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: ce = 1.35930023 * 2560; err = 0.40117188 * 2560; time = 0.0528s; samplesPerSecond = 48468.3
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: ce = 1.34254303 * 2560; err = 0.38632813 * 2560; time = 0.0523s; samplesPerSecond = 48920.3
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: ce = 1.30505676 * 2560; err = 0.38320312 * 2560; time = 0.0529s; samplesPerSecond = 48359.4
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: ce = 1.30881348 * 2560; err = 0.38476563 * 2560; time = 0.0529s; samplesPerSecond = 48437.1
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: ce = 1.23755188 * 2560; err = 0.37304688 * 2560; time = 0.0528s; samplesPerSecond = 48444.5
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: ce = 1.21070251 * 2560; err = 0.35546875 * 2560; time = 0.0530s; samplesPerSecond = 48291.9
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: ce = 1.24008789 * 2560; err = 0.37109375 * 2560; time = 0.0529s; samplesPerSecond = 48424.3
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: ce = 1.23422089 * 2560; err = 0.36835937 * 2560; time = 0.0528s; samplesPerSecond = 48503.2
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: ce = 1.19425964 * 2560; err = 0.35195312 * 2560; time = 0.0530s; samplesPerSecond = 48285.5
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: ce = 1.21415710 * 2560; err = 0.36289063 * 2560; time = 0.0529s; samplesPerSecond = 48400.5
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: ce = 1.24289856 * 2560; err = 0.37031250 * 2560; time = 0.0530s; samplesPerSecond = 48344.8
+05/03/2016 14:22:31:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: ce = 1.26465759 * 2560; err = 0.38359375 * 2560; time = 0.0529s; samplesPerSecond = 48390.5
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: ce = 1.22050476 * 2560; err = 0.38085938 * 2560; time = 0.0529s; samplesPerSecond = 48379.5
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: ce = 1.17745056 * 2560; err = 0.35507813 * 2560; time = 0.0528s; samplesPerSecond = 48461.0
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: ce = 1.19851379 * 2560; err = 0.37109375 * 2560; time = 0.0529s; samplesPerSecond = 48374.9
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: ce = 1.21453857 * 2560; err = 0.35820313 * 2560; time = 0.0528s; samplesPerSecond = 48514.3
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: ce = 1.18011475 * 2560; err = 0.35546875 * 2560; time = 0.0528s; samplesPerSecond = 48460.1
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: ce = 1.16693726 * 2560; err = 0.35195312 * 2560; time = 0.0529s; samplesPerSecond = 48396.9
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: ce = 1.12398987 * 2560; err = 0.35234375 * 2560; time = 0.0528s; samplesPerSecond = 48509.7
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: ce = 1.18822021 * 2560; err = 0.36328125 * 2560; time = 0.0528s; samplesPerSecond = 48521.6
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: ce = 1.13831482 * 2560; err = 0.35078125 * 2560; time = 0.0520s; samplesPerSecond = 49255.4
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: ce = 1.12718811 * 2560; err = 0.33984375 * 2560; time = 0.0530s; samplesPerSecond = 48345.7
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: ce = 1.11155396 * 2560; err = 0.34179688 * 2560; time = 0.0529s; samplesPerSecond = 48369.4
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: ce = 1.13423157 * 2560; err = 0.34101562 * 2560; time = 0.0530s; samplesPerSecond = 48343.8
+05/03/2016 14:22:32:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: ce = 1.12716675 * 2560; err = 0.34414062 * 2560; time = 0.0514s; samplesPerSecond = 49773.5
+05/03/2016 14:22:32: Finished Epoch[ 1 of 4]: [Training] ce = 1.40821428 * 81920; err = 0.40085449 * 81920; totalSamplesSeen = 81920; learningRatePerSample = 0.003125; epochTime=1.85405s
+05/03/2016 14:22:32: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.1'
 
-05/03/2016 14:26:07: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 14:22:32: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 1: frames [81920..163840] (first utterance at frame 81920), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:26:07: Starting minibatch loop.
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.20089607 * 5120; err = 0.36757812 * 5120; time = 0.0991s; samplesPerSecond = 51644.1
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.15295639 * 5120; err = 0.34550781 * 5120; time = 0.0853s; samplesPerSecond = 60041.7
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.09945831 * 5120; err = 0.33613281 * 5120; time = 0.0855s; samplesPerSecond = 59871.8
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.09916496 * 5120; err = 0.33867188 * 5120; time = 0.0848s; samplesPerSecond = 60410.1
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.17260475 * 5120; err = 0.36230469 * 5120; time = 0.0846s; samplesPerSecond = 60505.8
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.15717888 * 5120; err = 0.35820313 * 5120; time = 0.0853s; samplesPerSecond = 60037.5
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.14431229 * 5120; err = 0.34296875 * 5120; time = 0.0853s; samplesPerSecond = 60049.5
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.10515747 * 5120; err = 0.34394531 * 5120; time = 0.0852s; samplesPerSecond = 60065.0
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.15175400 * 5120; err = 0.35449219 * 5120; time = 0.0852s; samplesPerSecond = 60108.7
-05/03/2016 14:26:07:  Epoch[ 2 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.11654053 * 5120; err = 0.34101562 * 5120; time = 0.0852s; samplesPerSecond = 60079.8
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.11851807 * 5120; err = 0.34472656 * 5120; time = 0.0853s; samplesPerSecond = 60050.9
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.11374130 * 5120; err = 0.34492187 * 5120; time = 0.0851s; samplesPerSecond = 60134.8
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.04686737 * 5120; err = 0.32265625 * 5120; time = 0.0852s; samplesPerSecond = 60125.7
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.02721252 * 5120; err = 0.32246094 * 5120; time = 0.0853s; samplesPerSecond = 60053.7
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.08386230 * 5120; err = 0.33144531 * 5120; time = 0.0847s; samplesPerSecond = 60420.8
-05/03/2016 14:26:08:  Epoch[ 2 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.06164856 * 5120; err = 0.32558594 * 5120; time = 0.0854s; samplesPerSecond = 59939.1
-05/03/2016 14:26:08: Finished Epoch[ 2 of 4]: [Training] ce = 1.11574211 * 81920; err = 0.34266357 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=1.38479s
-05/03/2016 14:26:08: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.2'
+05/03/2016 14:22:32: Starting minibatch loop.
+05/03/2016 14:22:32:  Epoch[ 2 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.20089607 * 5120; err = 0.36757812 * 5120; time = 0.0980s; samplesPerSecond = 52218.8
+05/03/2016 14:22:32:  Epoch[ 2 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.15295639 * 5120; err = 0.34550781 * 5120; time = 0.0846s; samplesPerSecond = 60537.3
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.09945831 * 5120; err = 0.33613281 * 5120; time = 0.0849s; samplesPerSecond = 60300.6
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.09916496 * 5120; err = 0.33867188 * 5120; time = 0.0846s; samplesPerSecond = 60499.4
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.17260475 * 5120; err = 0.36230469 * 5120; time = 0.0847s; samplesPerSecond = 60469.3
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.15717888 * 5120; err = 0.35820313 * 5120; time = 0.0849s; samplesPerSecond = 60316.9
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.14431229 * 5120; err = 0.34296875 * 5120; time = 0.0849s; samplesPerSecond = 60280.7
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.10515747 * 5120; err = 0.34394531 * 5120; time = 0.0847s; samplesPerSecond = 60423.0
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.15175400 * 5120; err = 0.35449219 * 5120; time = 0.0848s; samplesPerSecond = 60368.8
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.11654053 * 5120; err = 0.34101562 * 5120; time = 0.0838s; samplesPerSecond = 61102.2
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.11851807 * 5120; err = 0.34472656 * 5120; time = 0.0843s; samplesPerSecond = 60757.1
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.11374130 * 5120; err = 0.34492187 * 5120; time = 0.0849s; samplesPerSecond = 60314.8
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.04686737 * 5120; err = 0.32265625 * 5120; time = 0.0849s; samplesPerSecond = 60297.0
+05/03/2016 14:22:33:  Epoch[ 2 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.02721252 * 5120; err = 0.32246094 * 5120; time = 0.0846s; samplesPerSecond = 60497.9
+05/03/2016 14:22:34:  Epoch[ 2 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.08386230 * 5120; err = 0.33144531 * 5120; time = 0.0847s; samplesPerSecond = 60477.9
+05/03/2016 14:22:34:  Epoch[ 2 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.06164856 * 5120; err = 0.32558594 * 5120; time = 0.0849s; samplesPerSecond = 60273.6
+05/03/2016 14:22:34: Finished Epoch[ 2 of 4]: [Training] ce = 1.11574211 * 81920; err = 0.34266357 * 81920; totalSamplesSeen = 163840; learningRatePerSample = 0.003125; epochTime=1.37117s
+05/03/2016 14:22:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.2'
 
-05/03/2016 14:26:08: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 14:22:34: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 2: frames [163840..245760] (first utterance at frame 163840), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:26:08: Starting minibatch loop.
-05/03/2016 14:26:08:  Epoch[ 3 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.12331724 * 5120; err = 0.34121094 * 5120; time = 0.0870s; samplesPerSecond = 58862.1
-05/03/2016 14:26:08:  Epoch[ 3 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.07871084 * 5120; err = 0.33652344 * 5120; time = 0.0853s; samplesPerSecond = 60035.4
-05/03/2016 14:26:08:  Epoch[ 3 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.06784954 * 5120; err = 0.33183594 * 5120; time = 0.0854s; samplesPerSecond = 59963.7
-05/03/2016 14:26:08:  Epoch[ 3 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.08440666 * 5120; err = 0.33398438 * 5120; time = 0.0853s; samplesPerSecond = 60046.7
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.07466812 * 5120; err = 0.33320312 * 5120; time = 0.0850s; samplesPerSecond = 60258.0
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.05427513 * 5120; err = 0.33125000 * 5120; time = 0.0853s; samplesPerSecond = 60002.3
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.06873093 * 5120; err = 0.32773438 * 5120; time = 0.0842s; samplesPerSecond = 60809.8
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.08097610 * 5120; err = 0.33007813 * 5120; time = 0.0843s; samplesPerSecond = 60752.8
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.05431290 * 5120; err = 0.32792969 * 5120; time = 0.0852s; samplesPerSecond = 60107.3
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.06173096 * 5120; err = 0.32695313 * 5120; time = 0.0852s; samplesPerSecond = 60097.4
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.04505692 * 5120; err = 0.32792969 * 5120; time = 0.0842s; samplesPerSecond = 60788.8
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.08151245 * 5120; err = 0.33574219 * 5120; time = 0.0851s; samplesPerSecond = 60157.4
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.10628204 * 5120; err = 0.33437500 * 5120; time = 0.0852s; samplesPerSecond = 60098.1
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.05827026 * 5120; err = 0.32636719 * 5120; time = 0.0850s; samplesPerSecond = 60223.2
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.05841064 * 5120; err = 0.33574219 * 5120; time = 0.0842s; samplesPerSecond = 60802.5
-05/03/2016 14:26:09:  Epoch[ 3 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.04437714 * 5120; err = 0.32773438 * 5120; time = 0.0880s; samplesPerSecond = 58171.9
-05/03/2016 14:26:09: Finished Epoch[ 3 of 4]: [Training] ce = 1.07143049 * 81920; err = 0.33178711 * 81920; totalSamplesSeen = 245760; learningRatePerSample = 0.003125; epochTime=1.37098s
-05/03/2016 14:26:09: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.3'
+05/03/2016 14:22:34: Starting minibatch loop.
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.12331724 * 5120; err = 0.34121094 * 5120; time = 0.0855s; samplesPerSecond = 59883.7
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.07871084 * 5120; err = 0.33652344 * 5120; time = 0.0846s; samplesPerSecond = 60534.4
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.06784954 * 5120; err = 0.33183594 * 5120; time = 0.0843s; samplesPerSecond = 60752.8
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.08440666 * 5120; err = 0.33398438 * 5120; time = 0.0848s; samplesPerSecond = 60379.5
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.07466812 * 5120; err = 0.33320312 * 5120; time = 0.0841s; samplesPerSecond = 60845.2
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.05427513 * 5120; err = 0.33125000 * 5120; time = 0.0842s; samplesPerSecond = 60830.0
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  61-  70, 43.75%]: ce = 1.06873093 * 5120; err = 0.32773438 * 5120; time = 0.0847s; samplesPerSecond = 60432.2
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.08097610 * 5120; err = 0.33007813 * 5120; time = 0.0846s; samplesPerSecond = 60492.2
+05/03/2016 14:22:34:  Epoch[ 3 of 4]-Minibatch[  81-  90, 56.25%]: ce = 1.05431290 * 5120; err = 0.32792969 * 5120; time = 0.0847s; samplesPerSecond = 60481.5
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[  91- 100, 62.50%]: ce = 1.06173096 * 5120; err = 0.32695313 * 5120; time = 0.0848s; samplesPerSecond = 60343.9
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 1.04505692 * 5120; err = 0.32792969 * 5120; time = 0.0846s; samplesPerSecond = 60539.4
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 1.08151245 * 5120; err = 0.33574219 * 5120; time = 0.0847s; samplesPerSecond = 60435.8
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.10628204 * 5120; err = 0.33437500 * 5120; time = 0.0848s; samplesPerSecond = 60362.4
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 1.05827026 * 5120; err = 0.32636719 * 5120; time = 0.0847s; samplesPerSecond = 60455.8
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 1.05841064 * 5120; err = 0.33574219 * 5120; time = 0.0843s; samplesPerSecond = 60735.5
+05/03/2016 14:22:35:  Epoch[ 3 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 1.04437714 * 5120; err = 0.32773438 * 5120; time = 0.0847s; samplesPerSecond = 60445.1
+05/03/2016 14:22:35: Finished Epoch[ 3 of 4]: [Training] ce = 1.07143049 * 81920; err = 0.33178711 * 81920; totalSamplesSeen = 245760; learningRatePerSample = 0.003125; epochTime=1.35706s
+05/03/2016 14:22:35: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech.3'
 
-05/03/2016 14:26:10: Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+05/03/2016 14:22:35: Starting Epoch 4: learning rate per sample = 0.003125  effective momentum = 0.810210  momentum as time constant = 2432.7 samples
+minibatchiterator: epoch 3: frames [245760..327680] (first utterance at frame 245760), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:26:10: Starting minibatch loop.
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.04450397 * 5120; err = 0.33125000 * 5120; time = 0.0869s; samplesPerSecond = 58945.4
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.02895847 * 4926; err = 0.31567194 * 4926; time = 0.2770s; samplesPerSecond = 17783.2
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.00198059 * 5120; err = 0.31601563 * 5120; time = 0.0840s; samplesPerSecond = 60980.7
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.00561562 * 5120; err = 0.31777344 * 5120; time = 0.0840s; samplesPerSecond = 60974.9
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.00148926 * 5120; err = 0.31601563 * 5120; time = 0.0840s; samplesPerSecond = 60981.4
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.00593376 * 5120; err = 0.31406250 * 5120; time = 0.0843s; samplesPerSecond = 60766.5
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  61-  70, 43.75%]: ce = 0.98752327 * 5120; err = 0.30722656 * 5120; time = 0.0846s; samplesPerSecond = 60496.5
-05/03/2016 14:26:10:  Epoch[ 4 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.01428757 * 5120; err = 0.31992188 * 5120; time = 0.0864s; samplesPerSecond = 59241.4
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[  81-  90, 56.25%]: ce = 0.99691544 * 5120; err = 0.31621094 * 5120; time = 0.0844s; samplesPerSecond = 60631.9
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[  91- 100, 62.50%]: ce = 0.96604996 * 5120; err = 0.30937500 * 5120; time = 0.0845s; samplesPerSecond = 60583.8
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 0.99062958 * 5120; err = 0.30527344 * 5120; time = 0.0848s; samplesPerSecond = 60356.7
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 0.99886856 * 5120; err = 0.30976562 * 5120; time = 0.0844s; samplesPerSecond = 60638.4
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.00958328 * 5120; err = 0.31523438 * 5120; time = 0.0847s; samplesPerSecond = 60461.5
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 0.97942047 * 5120; err = 0.31171875 * 5120; time = 0.0844s; samplesPerSecond = 60647.0
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 0.94226837 * 5120; err = 0.30136719 * 5120; time = 0.0842s; samplesPerSecond = 60788.8
-05/03/2016 14:26:11:  Epoch[ 4 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 0.96711578 * 5120; err = 0.30175781 * 5120; time = 0.0853s; samplesPerSecond = 60042.5
-05/03/2016 14:26:11: Finished Epoch[ 4 of 4]: [Training] ce = 0.99611807 * 81920; err = 0.31303711 * 81920; totalSamplesSeen = 327680; learningRatePerSample = 0.003125; epochTime=1.55959s
-05/03/2016 14:26:11: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech'
-05/03/2016 14:26:11: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:22:35: Starting minibatch loop.
+05/03/2016 14:22:35:  Epoch[ 4 of 4]-Minibatch[   1-  10, 6.25%]: ce = 1.04450397 * 5120; err = 0.33125000 * 5120; time = 0.0858s; samplesPerSecond = 59670.9
+05/03/2016 14:22:35:  Epoch[ 4 of 4]-Minibatch[  11-  20, 12.50%]: ce = 1.02895847 * 4926; err = 0.31567194 * 4926; time = 0.1419s; samplesPerSecond = 34706.5
+05/03/2016 14:22:35:  Epoch[ 4 of 4]-Minibatch[  21-  30, 18.75%]: ce = 1.00198059 * 5120; err = 0.31601563 * 5120; time = 0.0850s; samplesPerSecond = 60266.5
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  31-  40, 25.00%]: ce = 1.00561562 * 5120; err = 0.31777344 * 5120; time = 0.0849s; samplesPerSecond = 60304.1
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  41-  50, 31.25%]: ce = 1.00148926 * 5120; err = 0.31601563 * 5120; time = 0.0841s; samplesPerSecond = 60913.9
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  51-  60, 37.50%]: ce = 1.00593376 * 5120; err = 0.31406250 * 5120; time = 0.0842s; samplesPerSecond = 60814.1
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  61-  70, 43.75%]: ce = 0.98752327 * 5120; err = 0.30722656 * 5120; time = 0.0850s; samplesPerSecond = 60265.8
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  71-  80, 50.00%]: ce = 1.01428757 * 5120; err = 0.31992188 * 5120; time = 0.0847s; samplesPerSecond = 60472.9
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  81-  90, 56.25%]: ce = 0.99691544 * 5120; err = 0.31621094 * 5120; time = 0.0848s; samplesPerSecond = 60383.1
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[  91- 100, 62.50%]: ce = 0.96604996 * 5120; err = 0.30937500 * 5120; time = 0.0847s; samplesPerSecond = 60473.6
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[ 101- 110, 68.75%]: ce = 0.99062958 * 5120; err = 0.30527344 * 5120; time = 0.0847s; samplesPerSecond = 60458.6
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[ 111- 120, 75.00%]: ce = 0.99886856 * 5120; err = 0.30976562 * 5120; time = 0.0849s; samplesPerSecond = 60324.7
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[ 121- 130, 81.25%]: ce = 1.00958328 * 5120; err = 0.31523438 * 5120; time = 0.0847s; samplesPerSecond = 60462.9
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[ 131- 140, 87.50%]: ce = 0.97942047 * 5120; err = 0.31171875 * 5120; time = 0.0846s; samplesPerSecond = 60510.1
+05/03/2016 14:22:36:  Epoch[ 4 of 4]-Minibatch[ 141- 150, 93.75%]: ce = 0.94226837 * 5120; err = 0.30136719 * 5120; time = 0.0838s; samplesPerSecond = 61108.1
+05/03/2016 14:22:37:  Epoch[ 4 of 4]-Minibatch[ 151- 160, 100.00%]: ce = 0.96711578 * 5120; err = 0.30175781 * 5120; time = 0.0847s; samplesPerSecond = 60423.7
+05/03/2016 14:22:37: Finished Epoch[ 4 of 4]: [Training] ce = 0.99611807 * 81920; err = 0.31303711 * 81920; totalSamplesSeen = 327680; learningRatePerSample = 0.003125; epochTime=1.42049s
+05/03/2016 14:22:37: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_DiscriminativePreTraining@release_gpu/models/cntkSpeech'
+05/03/2016 14:22:37: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:26:11: Action "train" complete.
+05/03/2016 14:22:37: Action "train" complete.
 
-05/03/2016 14:26:11: __COMPLETED__
\ No newline at end of file
+05/03/2016 14:22:37: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/run-test
index 48ebc1ec5..b3cf889da 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/DiscriminativePreTraining/run-test
@@ -5,5 +5,11 @@
 OriginalTestDir=../../../DNN/DiscriminativePreTraining
 ConfigDir=$TEST_DIR/$OriginalTestDir
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkrun <CNTK config file name> <additional CNTK args>
 cntkrun cntk_dpt.cntk 'reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]' || exit $?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.cpu.txt
index a63bdb5b7..e72e0803a 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.cpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/.. OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -65,26 +65,26 @@ ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 1 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 0 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 1 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 0 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:03:27: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank0
-05/03/2016 18:03:28: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank1
-05/03/2016 18:03:28: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank2
+05/03/2016 18:00:03: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank0
+05/03/2016 18:00:04: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank1
+05/03/2016 18:00:04: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank2
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 4951 on
+mpiexec has exited due to process rank 0 with PID 3194 on
 node 87698aadbc9d exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -107,32 +107,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:03:27: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:03:27: Build info: 
+MPI Rank 0: 05/03/2016 18:00:03: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:00:03: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:03:27: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 0: 05/03/2016 18:03:27: 		Build type: release
-MPI Rank 0: 05/03/2016 18:03:27: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:03:27: 		With 1bit-SGD: yes
-MPI Rank 0: 05/03/2016 18:03:27: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:03:27: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:03:27: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:03:27: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:03:27: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:03:27: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:03:27: 		Built by philly on 87698aadbc9d
-MPI Rank 0: 05/03/2016 18:03:27: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:03:27: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:00:03: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:00:03: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 0: 05/03/2016 18:00:03: 		Build type: release
+MPI Rank 0: 05/03/2016 18:00:03: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:00:03: 		With 1bit-SGD: yes
+MPI Rank 0: 05/03/2016 18:00:03: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:00:03: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:00:03: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:00:03: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:00:03: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:00:03: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:00:03: 		Built by philly on 87698aadbc9d
+MPI Rank 0: 05/03/2016 18:00:03: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:00:03: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Running on localhost at 2016/05/03 18:03:27
-MPI Rank 0: 05/03/2016 18:03:27: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: 05/03/2016 18:00:03: Running on localhost at 2016/05/03 18:00:03
+MPI Rank 0: 05/03/2016 18:00:03: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:03:27: precision = "float"
+MPI Rank 0: 05/03/2016 18:00:03: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:03: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -222,30 +222,28 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:03: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:03:27: precision = "float"
+MPI Rank 0: 05/03/2016 18:00:03: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:03: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -329,36 +327,34 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:03: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:03: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -440,35 +436,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:03:27: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:03:27: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:03:27: Precision = "double"
-MPI Rank 0: 05/03/2016 18:03:27: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:03:27: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:03:27: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 18:03:27: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 18:00:03: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:03: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:00:03: Precision = "double"
+MPI Rank 0: 05/03/2016 18:00:03: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 18:00:03: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:00:03: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 18:00:03: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: ##############################################################################
-MPI Rank 0: 05/03/2016 18:03:27: #                                                                            #
-MPI Rank 0: 05/03/2016 18:03:27: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:03:27: #                                                                            #
-MPI Rank 0: 05/03/2016 18:03:27: ##############################################################################
+MPI Rank 0: 05/03/2016 18:00:03: ##############################################################################
+MPI Rank 0: 05/03/2016 18:00:03: #                                                                            #
+MPI Rank 0: 05/03/2016 18:00:03: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:00:03: #                                                                            #
+MPI Rank 0: 05/03/2016 18:00:03: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:00:03: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:00:03: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -520,14 +515,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 18:00:03: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:03:27: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:00:03: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:00:03: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:00:03: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:00:03: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -535,135 +530,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x2f28b88: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x2f54288: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x2fca3c8: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x2fd7148: {[features Value[363 x *]] }
-MPI Rank 0: 0x2fd7738: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x2fe7078: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x2fe7238: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x2fe73f8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x2ffe228: {[labels Value[132 x *]] }
-MPI Rank 0: 0x30028e8: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x300dc78: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x300fd18: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x30269f8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x302a358: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x302b258: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x302f6b8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x302f878: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x302fa38: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0x3034f68: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x304c3d8: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x304c5e8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x304d8f8: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x304f1c8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x3051b18: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3051cd8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x3051e98: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x30567a8: {[Prior Value[132]] }
-MPI Rank 0: 0x3058708: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x18ed2b8: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x18ef6b8: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x18efd18: {[features Value[363 x *]] }
+MPI Rank 0: 0x18f2638: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x197ad48: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x198ed68: {[labels Value[132 x *]] }
+MPI Rank 0: 0x199bcf8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x199beb8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x199c078: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x199c178: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0x19a0098: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x19a6408: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x19c2cf8: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x19c2ee8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x19c30a8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x19c4568: {[Prior Value[132]] }
+MPI Rank 0: 0x19cf138: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x19d91e8: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x19fe9e8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x19feba8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x19fed68: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x19fef28: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x19ff0e8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x1a03768: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x1a13a48: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x1a13c38: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x1a17e08: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x1a19d68: {[B0 Value[512 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:00:03: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:27: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:03:27: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:03:27: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:00:03: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:00:03: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:00:03: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:33: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:00:04: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:35: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:00:06: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:35: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.4979s; samplesPerSecond = 1285.5
-MPI Rank 0: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3761s; samplesPerSecond = 1701.6
-MPI Rank 0: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3428s; samplesPerSecond = 1867.2
-MPI Rank 0: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.5019s; samplesPerSecond = 1275.1
-MPI Rank 0: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3331s; samplesPerSecond = 1921.5
-MPI Rank 0: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3256s; samplesPerSecond = 1965.3
-MPI Rank 0: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3740s; samplesPerSecond = 1711.1
-MPI Rank 0: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3379s; samplesPerSecond = 1894.0
-MPI Rank 0: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3682s; samplesPerSecond = 1738.4
-MPI Rank 0: 05/03/2016 18:03:39:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.5351s; samplesPerSecond = 1196.1
-MPI Rank 0: 05/03/2016 18:03:39:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.4025s; samplesPerSecond = 1590.1
-MPI Rank 0: 05/03/2016 18:03:40:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3562s; samplesPerSecond = 1796.6
-MPI Rank 0: 05/03/2016 18:03:40:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4270s; samplesPerSecond = 1498.7
-MPI Rank 0: 05/03/2016 18:03:40:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3474s; samplesPerSecond = 1842.5
-MPI Rank 0: 05/03/2016 18:03:41:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.5579s; samplesPerSecond = 1147.1
-MPI Rank 0: 05/03/2016 18:03:41:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3275s; samplesPerSecond = 1954.1
-MPI Rank 0: 05/03/2016 18:03:42:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3130s; samplesPerSecond = 2044.5
-MPI Rank 0: 05/03/2016 18:03:42:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3770s; samplesPerSecond = 1697.5
-MPI Rank 0: 05/03/2016 18:03:42:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3642s; samplesPerSecond = 1757.5
-MPI Rank 0: 05/03/2016 18:03:43:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3430s; samplesPerSecond = 1865.9
-MPI Rank 0: 05/03/2016 18:03:43:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.5543s; samplesPerSecond = 1154.6
-MPI Rank 0: 05/03/2016 18:03:44:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3284s; samplesPerSecond = 1948.7
-MPI Rank 0: 05/03/2016 18:03:44:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3341s; samplesPerSecond = 1915.7
-MPI Rank 0: 05/03/2016 18:03:44:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3803s; samplesPerSecond = 1682.9
-MPI Rank 0: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3382s; samplesPerSecond = 1892.1
-MPI Rank 0: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3296s; samplesPerSecond = 1941.9
-MPI Rank 0: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.5779s; samplesPerSecond = 1107.4
-MPI Rank 0: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3444s; samplesPerSecond = 1858.3
-MPI Rank 0: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3405s; samplesPerSecond = 1879.5
-MPI Rank 0: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3262s; samplesPerSecond = 1962.3
-MPI Rank 0: 05/03/2016 18:03:47:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3211s; samplesPerSecond = 1993.4
-MPI Rank 0: 05/03/2016 18:03:47:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3283s; samplesPerSecond = 1949.4
-MPI Rank 0: 05/03/2016 18:03:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.369s
-MPI Rank 0: 05/03/2016 18:03:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:00:06: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.4061s; samplesPerSecond = 1575.9
+MPI Rank 0: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3417s; samplesPerSecond = 1872.8
+MPI Rank 0: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3770s; samplesPerSecond = 1697.5
+MPI Rank 0: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3226s; samplesPerSecond = 1984.1
+MPI Rank 0: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3339s; samplesPerSecond = 1916.5
+MPI Rank 0: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.5252s; samplesPerSecond = 1218.5
+MPI Rank 0: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3740s; samplesPerSecond = 1711.3
+MPI Rank 0: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.4496s; samplesPerSecond = 1423.6
+MPI Rank 0: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3698s; samplesPerSecond = 1730.5
+MPI Rank 0: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.2683s; samplesPerSecond = 2385.6
+MPI Rank 0: 05/03/2016 18:00:10:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.5378s; samplesPerSecond = 1190.0
+MPI Rank 0: 05/03/2016 18:00:10:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3486s; samplesPerSecond = 1835.7
+MPI Rank 0: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3375s; samplesPerSecond = 1896.5
+MPI Rank 0: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3917s; samplesPerSecond = 1633.7
+MPI Rank 0: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.3869s; samplesPerSecond = 1654.3
+MPI Rank 0: 05/03/2016 18:00:12:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3243s; samplesPerSecond = 1973.3
+MPI Rank 0: 05/03/2016 18:00:12:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.5272s; samplesPerSecond = 1214.0
+MPI Rank 0: 05/03/2016 18:00:13:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3097s; samplesPerSecond = 2066.4
+MPI Rank 0: 05/03/2016 18:00:13:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3668s; samplesPerSecond = 1744.9
+MPI Rank 0: 05/03/2016 18:00:13:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3559s; samplesPerSecond = 1798.1
+MPI Rank 0: 05/03/2016 18:00:14:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3450s; samplesPerSecond = 1855.3
+MPI Rank 0: 05/03/2016 18:00:14:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3847s; samplesPerSecond = 1663.5
+MPI Rank 0: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.5093s; samplesPerSecond = 1256.6
+MPI Rank 0: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3401s; samplesPerSecond = 1881.9
+MPI Rank 0: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3194s; samplesPerSecond = 2003.5
+MPI Rank 0: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.4304s; samplesPerSecond = 1487.1
+MPI Rank 0: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3574s; samplesPerSecond = 1790.5
+MPI Rank 0: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.2866s; samplesPerSecond = 2233.0
+MPI Rank 0: 05/03/2016 18:00:17:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.4853s; samplesPerSecond = 1318.7
+MPI Rank 0: 05/03/2016 18:00:17:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2957s; samplesPerSecond = 2164.6
+MPI Rank 0: 05/03/2016 18:00:17:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3328s; samplesPerSecond = 1922.9
+MPI Rank 0: 05/03/2016 18:00:18:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1055s; samplesPerSecond = 6067.4
+MPI Rank 0: 05/03/2016 18:00:18: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=11.853s
+MPI Rank 0: 05/03/2016 18:00:18: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:47: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:00:18: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:03:48:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.8224s; samplesPerSecond = 3112.9
-MPI Rank 0: 05/03/2016 18:03:49:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 1.0010s; samplesPerSecond = 2557.5
-MPI Rank 0: 05/03/2016 18:03:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9564s; samplesPerSecond = 2676.8
-MPI Rank 0: 05/03/2016 18:03:51:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9599s; samplesPerSecond = 2666.9
-MPI Rank 0: 05/03/2016 18:03:52:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1041s; samplesPerSecond = 2318.7
-MPI Rank 0: 05/03/2016 18:03:53:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 0.9941s; samplesPerSecond = 2575.2
-MPI Rank 0: 05/03/2016 18:03:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8180s; samplesPerSecond = 3129.6
-MPI Rank 0: 05/03/2016 18:03:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.8241s; samplesPerSecond = 3106.4
-MPI Rank 0: 05/03/2016 18:03:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.50583s
-MPI Rank 0: 05/03/2016 18:03:55: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 18:00:18: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:00:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.6801s; samplesPerSecond = 3764.3
+MPI Rank 0: 05/03/2016 18:00:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.8514s; samplesPerSecond = 3006.9
+MPI Rank 0: 05/03/2016 18:00:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9545s; samplesPerSecond = 2682.0
+MPI Rank 0: 05/03/2016 18:00:21:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9750s; samplesPerSecond = 2625.7
+MPI Rank 0: 05/03/2016 18:00:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.6952s; samplesPerSecond = 3682.3
+MPI Rank 0: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 0.7721s; samplesPerSecond = 3315.7
+MPI Rank 0: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.9461s; samplesPerSecond = 2706.0
+MPI Rank 0: 05/03/2016 18:00:24:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9020s; samplesPerSecond = 2838.3
+MPI Rank 0: 05/03/2016 18:00:24: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.81334s
+MPI Rank 0: 05/03/2016 18:00:24: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:00:25: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:03:57:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.1483s; samplesPerSecond = 4766.6
-MPI Rank 0: 05/03/2016 18:03:59:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1871s; samplesPerSecond = 4682.1
-MPI Rank 0: 05/03/2016 18:03:59: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.36319s
-MPI Rank 0: 05/03/2016 18:03:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:03:59: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 18:00:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:00:27:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.3141s; samplesPerSecond = 4425.1
+MPI Rank 0: 05/03/2016 18:00:29:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1273s; samplesPerSecond = 4813.6
+MPI Rank 0: 05/03/2016 18:00:29: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.47587s
+MPI Rank 0: 05/03/2016 18:00:29: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:00:29: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:59: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:00:29: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:03:59: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:03:28: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:03:28: Build info: 
+MPI Rank 0: 05/03/2016 18:00:29: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:00:04: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:00:04: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:03:28: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 1: 05/03/2016 18:03:28: 		Build type: release
-MPI Rank 1: 05/03/2016 18:03:28: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:03:28: 		With 1bit-SGD: yes
-MPI Rank 1: 05/03/2016 18:03:28: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:03:28: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:03:28: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:03:28: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:03:28: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:03:28: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:03:28: 		Built by philly on 87698aadbc9d
-MPI Rank 1: 05/03/2016 18:03:28: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:03:28: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:00:04: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:00:04: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 1: 05/03/2016 18:00:04: 		Build type: release
+MPI Rank 1: 05/03/2016 18:00:04: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:00:04: 		With 1bit-SGD: yes
+MPI Rank 1: 05/03/2016 18:00:04: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:00:04: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:00:04: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:00:04: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:00:04: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:00:04: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:00:04: 		Built by philly on 87698aadbc9d
+MPI Rank 1: 05/03/2016 18:00:04: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:00:04: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Running on localhost at 2016/05/03 18:03:28
-MPI Rank 1: 05/03/2016 18:03:28: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: 05/03/2016 18:00:04: Running on localhost at 2016/05/03 18:00:04
+MPI Rank 1: 05/03/2016 18:00:04: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:03:28: precision = "float"
+MPI Rank 1: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:04: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -753,30 +753,28 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:03:28: precision = "float"
+MPI Rank 1: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:04: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -860,36 +858,34 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -971,35 +967,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:03:28: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:03:28: Precision = "double"
-MPI Rank 1: 05/03/2016 18:03:28: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:03:28: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:03:28: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 18:03:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:04: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:00:04: Precision = "double"
+MPI Rank 1: 05/03/2016 18:00:04: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 18:00:04: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:00:04: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 18:00:04: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: ##############################################################################
-MPI Rank 1: 05/03/2016 18:03:28: #                                                                            #
-MPI Rank 1: 05/03/2016 18:03:28: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:03:28: #                                                                            #
-MPI Rank 1: 05/03/2016 18:03:28: ##############################################################################
+MPI Rank 1: 05/03/2016 18:00:04: ##############################################################################
+MPI Rank 1: 05/03/2016 18:00:04: #                                                                            #
+MPI Rank 1: 05/03/2016 18:00:04: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:00:04: #                                                                            #
+MPI Rank 1: 05/03/2016 18:00:04: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:00:04: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:00:04: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1051,14 +1046,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 18:00:04: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:03:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:00:04: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:00:04: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:00:04: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:00:04: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1066,132 +1061,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x176fbb8: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x1780c58: {[Prior Value[132]] }
-MPI Rank 1: 0x17839f8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x1783c78: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x1783e38: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x1797cf8: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x17be0e8: {[features Value[363 x *]] }
-MPI Rank 1: 0x17c64a8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x180f058: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x180f5b8: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x180f6e8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x1835b18: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x1865498: {[labels Value[132 x *]] }
-MPI Rank 1: 0x1880498: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0x1883038: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x18832e8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x18834a8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x1883668: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x1884b48: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x189c6c8: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x189f1c8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x189f388: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x189f548: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x189f648: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x18a9898: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x18a9a28: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x18aac88: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x18ade38: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x2cb4ff8: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x2cb5228: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x2cb5948: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0x2cb6538: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x2cb8cc8: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x2d375d8: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x2d3a598: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x2d40558: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x2d40718: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x2d40ba8: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x2d6c0e8: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x2d8a0c8: {[Prior Value[132]] }
+MPI Rank 1: 0x2d952f8: {[features Value[363 x *]] }
+MPI Rank 1: 0x2d953d8: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x2d9b0c8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x2d9b288: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x2d9b448: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x2db1ad8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x2db1c38: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x2db1df8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x2db1fb8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x2db7c08: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x2db7cc8: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0x2db8928: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x2db8b38: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x2db8cf8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x2dd8d78: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x2ddcf28: {[labels Value[132 x *]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:00:04: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:28: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:03:28: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:03:28: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:00:04: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:00:04: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:00:04: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:29: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:00:05: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:35: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:00:06: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:35: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1951s; samplesPerSecond = 3279.9
-MPI Rank 1: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1053s; samplesPerSecond = 6078.9
-MPI Rank 1: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1044s; samplesPerSecond = 6128.9
-MPI Rank 1: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.1045s; samplesPerSecond = 6124.2
-MPI Rank 1: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.1051s; samplesPerSecond = 6089.0
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1050s; samplesPerSecond = 6097.4
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.1048s; samplesPerSecond = 6105.5
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.1051s; samplesPerSecond = 6091.6
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1048s; samplesPerSecond = 6106.1
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.1047s; samplesPerSecond = 6113.4
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.1081s; samplesPerSecond = 5922.1
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.1049s; samplesPerSecond = 6100.4
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.1046s; samplesPerSecond = 6120.9
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.1048s; samplesPerSecond = 6109.3
-MPI Rank 1: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.1044s; samplesPerSecond = 6127.7
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.1050s; samplesPerSecond = 6097.0
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1048s; samplesPerSecond = 6106.6
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.1044s; samplesPerSecond = 6127.5
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.1047s; samplesPerSecond = 6115.4
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.1049s; samplesPerSecond = 6102.7
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.1050s; samplesPerSecond = 6093.7
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1046s; samplesPerSecond = 6120.2
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.1047s; samplesPerSecond = 6115.3
-MPI Rank 1: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1046s; samplesPerSecond = 6117.3
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.1048s; samplesPerSecond = 6107.0
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.1049s; samplesPerSecond = 6099.6
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.1051s; samplesPerSecond = 6092.2
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.1049s; samplesPerSecond = 6100.8
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1047s; samplesPerSecond = 6110.6
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1046s; samplesPerSecond = 6120.1
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.1046s; samplesPerSecond = 6115.9
-MPI Rank 1: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1048s; samplesPerSecond = 6108.7
-MPI Rank 1: 05/03/2016 18:03:38: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.49341s
+MPI Rank 1: 05/03/2016 18:00:06: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1096s; samplesPerSecond = 5840.6
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1043s; samplesPerSecond = 6136.2
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1052s; samplesPerSecond = 6085.5
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.1054s; samplesPerSecond = 6069.2
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.1055s; samplesPerSecond = 6066.1
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1048s; samplesPerSecond = 6107.7
+MPI Rank 1: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.1062s; samplesPerSecond = 6028.2
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.1058s; samplesPerSecond = 6048.6
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1051s; samplesPerSecond = 6092.2
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.1060s; samplesPerSecond = 6039.6
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.1051s; samplesPerSecond = 6090.6
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.1053s; samplesPerSecond = 6076.8
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.1055s; samplesPerSecond = 6063.8
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.1050s; samplesPerSecond = 6093.1
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.1051s; samplesPerSecond = 6087.8
+MPI Rank 1: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.1054s; samplesPerSecond = 6074.2
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1058s; samplesPerSecond = 6050.8
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.1054s; samplesPerSecond = 6070.3
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.1057s; samplesPerSecond = 6055.8
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.1074s; samplesPerSecond = 5960.4
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.1057s; samplesPerSecond = 6056.5
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1054s; samplesPerSecond = 6069.8
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.1055s; samplesPerSecond = 6067.7
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1059s; samplesPerSecond = 6046.1
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.1056s; samplesPerSecond = 6063.0
+MPI Rank 1: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.1059s; samplesPerSecond = 6045.7
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.1055s; samplesPerSecond = 6068.1
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.1052s; samplesPerSecond = 6081.2
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1054s; samplesPerSecond = 6071.5
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1058s; samplesPerSecond = 6046.4
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.1056s; samplesPerSecond = 6060.1
+MPI Rank 1: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1055s; samplesPerSecond = 6068.7
+MPI Rank 1: 05/03/2016 18:00:09: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.38574s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:47: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:00:18: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:03:48:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.8174s; samplesPerSecond = 3131.7
-MPI Rank 1: 05/03/2016 18:03:49:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 1.0016s; samplesPerSecond = 2556.0
-MPI Rank 1: 05/03/2016 18:03:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9558s; samplesPerSecond = 2678.4
-MPI Rank 1: 05/03/2016 18:03:51:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9592s; samplesPerSecond = 2669.0
-MPI Rank 1: 05/03/2016 18:03:52:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1051s; samplesPerSecond = 2316.6
-MPI Rank 1: 05/03/2016 18:03:53:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 0.9900s; samplesPerSecond = 2585.8
-MPI Rank 1: 05/03/2016 18:03:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.7567s; samplesPerSecond = 3383.0
-MPI Rank 1: 05/03/2016 18:03:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.8908s; samplesPerSecond = 2873.8
-MPI Rank 1: 05/03/2016 18:03:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.50512s
+MPI Rank 1: 05/03/2016 18:00:18: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:00:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.6793s; samplesPerSecond = 3768.6
+MPI Rank 1: 05/03/2016 18:00:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.8311s; samplesPerSecond = 3080.1
+MPI Rank 1: 05/03/2016 18:00:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9620s; samplesPerSecond = 2661.2
+MPI Rank 1: 05/03/2016 18:00:21:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9841s; samplesPerSecond = 2601.4
+MPI Rank 1: 05/03/2016 18:00:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.6995s; samplesPerSecond = 3659.7
+MPI Rank 1: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 0.7666s; samplesPerSecond = 3339.4
+MPI Rank 1: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.9408s; samplesPerSecond = 2721.0
+MPI Rank 1: 05/03/2016 18:00:24:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.8968s; samplesPerSecond = 2854.5
+MPI Rank 1: 05/03/2016 18:00:24: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.80472s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:00:25: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:03:57:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.1441s; samplesPerSecond = 4775.8
-MPI Rank 1: 05/03/2016 18:03:59:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1809s; samplesPerSecond = 4695.3
-MPI Rank 1: 05/03/2016 18:03:59: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.36247s
-MPI Rank 1: 05/03/2016 18:03:59: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:00:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:00:27:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.3100s; samplesPerSecond = 4432.8
+MPI Rank 1: 05/03/2016 18:00:29:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1260s; samplesPerSecond = 4816.5
+MPI Rank 1: 05/03/2016 18:00:29: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.46945s
+MPI Rank 1: 05/03/2016 18:00:29: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:59: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:00:29: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:03:59: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:03:28: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:03:28: Build info: 
+MPI Rank 1: 05/03/2016 18:00:29: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:00:04: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:00:04: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:03:28: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 2: 05/03/2016 18:03:28: 		Build type: release
-MPI Rank 2: 05/03/2016 18:03:28: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:03:28: 		With 1bit-SGD: yes
-MPI Rank 2: 05/03/2016 18:03:28: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:03:28: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:03:28: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:03:28: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:03:28: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:03:28: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:03:28: 		Built by philly on 87698aadbc9d
-MPI Rank 2: 05/03/2016 18:03:28: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:03:28: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:00:04: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:00:04: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 2: 05/03/2016 18:00:04: 		Build type: release
+MPI Rank 2: 05/03/2016 18:00:04: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:00:04: 		With 1bit-SGD: yes
+MPI Rank 2: 05/03/2016 18:00:04: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:00:04: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:00:04: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:00:04: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:00:04: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:00:04: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:00:04: 		Built by philly on 87698aadbc9d
+MPI Rank 2: 05/03/2016 18:00:04: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:00:04: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Running on localhost at 2016/05/03 18:03:28
-MPI Rank 2: 05/03/2016 18:03:28: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: 05/03/2016 18:00:04: Running on localhost at 2016/05/03 18:00:04
+MPI Rank 2: 05/03/2016 18:00:04: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:03:28: precision = "float"
+MPI Rank 2: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:04: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1281,30 +1281,28 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:03:28: precision = "float"
+MPI Rank 2: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:04: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1388,36 +1386,34 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:04: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1499,35 +1495,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:03:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:03:28: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:03:28: Precision = "double"
-MPI Rank 2: 05/03/2016 18:03:28: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:03:28: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:03:28: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 18:03:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 18:00:04: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:04: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:00:04: Precision = "double"
+MPI Rank 2: 05/03/2016 18:00:04: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 18:00:04: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:00:04: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 18:00:04: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: ##############################################################################
-MPI Rank 2: 05/03/2016 18:03:28: #                                                                            #
-MPI Rank 2: 05/03/2016 18:03:28: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:03:28: #                                                                            #
-MPI Rank 2: 05/03/2016 18:03:28: ##############################################################################
+MPI Rank 2: 05/03/2016 18:00:04: ##############################################################################
+MPI Rank 2: 05/03/2016 18:00:04: #                                                                            #
+MPI Rank 2: 05/03/2016 18:00:04: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:00:04: #                                                                            #
+MPI Rank 2: 05/03/2016 18:00:04: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:00:04: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:00:04: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1579,14 +1574,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 18:00:04: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:03:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:00:04: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:00:04: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:00:04: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:00:04: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1594,103 +1589,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x2a23b58: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x2a4f258: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x2ac5398: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x2ad2118: {[features Value[363 x *]] }
-MPI Rank 2: 0x2ad2708: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x2ae2048: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2ae2208: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x2ae23c8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x2af91f8: {[labels Value[132 x *]] }
-MPI Rank 2: 0x2afd8b8: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x2b08c48: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x2b0ace8: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x2b219c8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x2b25328: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x2b26228: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x2b2a688: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x2b2a848: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x2b2aa08: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0x2b2ff38: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x2b473a8: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x2b475b8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x2b488c8: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x2b4a198: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x2b4cae8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2b4cca8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x2b4ce68: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2b51778: {[Prior Value[132]] }
-MPI Rank 2: 0x2b536d8: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x3023c58: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x3024048: {[features Value[363 x *]] }
+MPI Rank 2: 0x30242a8: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0x3025da8: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x30278b8: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x3027e78: {[Prior Value[132]] }
+MPI Rank 2: 0x3074088: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x3074188: {[labels Value[132 x *]] }
+MPI Rank 2: 0x30a7888: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x30d4558: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x30d4718: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x30d48d8: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x30d4a98: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0x30dd7b8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x30fb138: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x30fe328: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x3104958: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x3121348: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x3124d88: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x3124f48: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x3125108: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x31252c8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x3125488: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x3125648: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x313bb68: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x313bd28: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x3148d48: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0x314e848: {[EvalErrorPrediction Value[1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:00:04: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:28: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:03:28: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:03:28: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:00:04: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:00:04: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:00:04: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:35: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:00:06: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:35: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:00:06: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:35: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:03:35:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.5948s; samplesPerSecond = 1075.9
-MPI Rank 2: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3250s; samplesPerSecond = 1969.2
-MPI Rank 2: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3233s; samplesPerSecond = 1979.4
-MPI Rank 2: 05/03/2016 18:03:36:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3660s; samplesPerSecond = 1748.5
-MPI Rank 2: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3689s; samplesPerSecond = 1735.1
-MPI Rank 2: 05/03/2016 18:03:37:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3395s; samplesPerSecond = 1885.3
-MPI Rank 2: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.5258s; samplesPerSecond = 1217.2
-MPI Rank 2: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3612s; samplesPerSecond = 1772.0
-MPI Rank 2: 05/03/2016 18:03:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3464s; samplesPerSecond = 1847.5
-MPI Rank 2: 05/03/2016 18:03:39:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3551s; samplesPerSecond = 1802.4
-MPI Rank 2: 05/03/2016 18:03:39:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3632s; samplesPerSecond = 1762.1
-MPI Rank 2: 05/03/2016 18:03:39:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3584s; samplesPerSecond = 1785.7
-MPI Rank 2: 05/03/2016 18:03:40:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.5265s; samplesPerSecond = 1215.5
-MPI Rank 2: 05/03/2016 18:03:40:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3400s; samplesPerSecond = 1882.6
-MPI Rank 2: 05/03/2016 18:03:41:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.3401s; samplesPerSecond = 1881.9
-MPI Rank 2: 05/03/2016 18:03:41:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3399s; samplesPerSecond = 1882.8
-MPI Rank 2: 05/03/2016 18:03:41:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3625s; samplesPerSecond = 1765.7
-MPI Rank 2: 05/03/2016 18:03:42:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3576s; samplesPerSecond = 1789.5
-MPI Rank 2: 05/03/2016 18:03:42:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.5395s; samplesPerSecond = 1186.3
-MPI Rank 2: 05/03/2016 18:03:43:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3456s; samplesPerSecond = 1851.9
-MPI Rank 2: 05/03/2016 18:03:43:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3277s; samplesPerSecond = 1952.8
-MPI Rank 2: 05/03/2016 18:03:43:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3705s; samplesPerSecond = 1727.4
-MPI Rank 2: 05/03/2016 18:03:44:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3475s; samplesPerSecond = 1841.6
-MPI Rank 2: 05/03/2016 18:03:44:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.5519s; samplesPerSecond = 1159.6
-MPI Rank 2: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3678s; samplesPerSecond = 1740.1
-MPI Rank 2: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3556s; samplesPerSecond = 1799.9
-MPI Rank 2: 05/03/2016 18:03:45:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3500s; samplesPerSecond = 1828.4
-MPI Rank 2: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3469s; samplesPerSecond = 1844.7
-MPI Rank 2: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3230s; samplesPerSecond = 1981.6
-MPI Rank 2: 05/03/2016 18:03:46:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.5458s; samplesPerSecond = 1172.6
-MPI Rank 2: 05/03/2016 18:03:47:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3699s; samplesPerSecond = 1730.1
-MPI Rank 2: 05/03/2016 18:03:47:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3493s; samplesPerSecond = 1832.4
-MPI Rank 2: 05/03/2016 18:03:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.4563s
+MPI Rank 2: 05/03/2016 18:00:06: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1161s; samplesPerSecond = 5514.1
+MPI Rank 2: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1601s; samplesPerSecond = 3996.5
+MPI Rank 2: 05/03/2016 18:00:06:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3369s; samplesPerSecond = 1899.6
+MPI Rank 2: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.5526s; samplesPerSecond = 1158.2
+MPI Rank 2: 05/03/2016 18:00:07:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3619s; samplesPerSecond = 1768.2
+MPI Rank 2: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3564s; samplesPerSecond = 1795.6
+MPI Rank 2: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3292s; samplesPerSecond = 1944.0
+MPI Rank 2: 05/03/2016 18:00:08:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3558s; samplesPerSecond = 1798.7
+MPI Rank 2: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3259s; samplesPerSecond = 1963.7
+MPI Rank 2: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.5317s; samplesPerSecond = 1203.8
+MPI Rank 2: 05/03/2016 18:00:09:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3594s; samplesPerSecond = 1780.6
+MPI Rank 2: 05/03/2016 18:00:10:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3503s; samplesPerSecond = 1826.9
+MPI Rank 2: 05/03/2016 18:00:10:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3746s; samplesPerSecond = 1708.4
+MPI Rank 2: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3655s; samplesPerSecond = 1751.0
+MPI Rank 2: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.5256s; samplesPerSecond = 1217.7
+MPI Rank 2: 05/03/2016 18:00:11:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3300s; samplesPerSecond = 1939.2
+MPI Rank 2: 05/03/2016 18:00:12:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3337s; samplesPerSecond = 1918.1
+MPI Rank 2: 05/03/2016 18:00:12:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3330s; samplesPerSecond = 1922.1
+MPI Rank 2: 05/03/2016 18:00:12:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3177s; samplesPerSecond = 2014.4
+MPI Rank 2: 05/03/2016 18:00:13:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3163s; samplesPerSecond = 2023.2
+MPI Rank 2: 05/03/2016 18:00:13:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.5587s; samplesPerSecond = 1145.6
+MPI Rank 2: 05/03/2016 18:00:14:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.2812s; samplesPerSecond = 2276.1
+MPI Rank 2: 05/03/2016 18:00:14:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3182s; samplesPerSecond = 2011.6
+MPI Rank 2: 05/03/2016 18:00:14:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3118s; samplesPerSecond = 2052.9
+MPI Rank 2: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3288s; samplesPerSecond = 1946.3
+MPI Rank 2: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3233s; samplesPerSecond = 1979.8
+MPI Rank 2: 05/03/2016 18:00:15:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3488s; samplesPerSecond = 1834.9
+MPI Rank 2: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.5007s; samplesPerSecond = 1278.2
+MPI Rank 2: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3518s; samplesPerSecond = 1819.0
+MPI Rank 2: 05/03/2016 18:00:16:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3626s; samplesPerSecond = 1765.2
+MPI Rank 2: 05/03/2016 18:00:17:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3647s; samplesPerSecond = 1754.9
+MPI Rank 2: 05/03/2016 18:00:17:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3617s; samplesPerSecond = 1769.3
+MPI Rank 2: 05/03/2016 18:00:17: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=11.4523s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:47: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:00:18: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:03:48:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.8132s; samplesPerSecond = 3148.0
-MPI Rank 2: 05/03/2016 18:03:49:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 1.0012s; samplesPerSecond = 2557.0
-MPI Rank 2: 05/03/2016 18:03:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9565s; samplesPerSecond = 2676.3
-MPI Rank 2: 05/03/2016 18:03:51:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9622s; samplesPerSecond = 2660.5
-MPI Rank 2: 05/03/2016 18:03:52:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1017s; samplesPerSecond = 2323.7
-MPI Rank 2: 05/03/2016 18:03:53:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 1.0021s; samplesPerSecond = 2554.6
-MPI Rank 2: 05/03/2016 18:03:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.7503s; samplesPerSecond = 3412.2
-MPI Rank 2: 05/03/2016 18:03:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.8852s; samplesPerSecond = 2892.1
-MPI Rank 2: 05/03/2016 18:03:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.50157s
+MPI Rank 2: 05/03/2016 18:00:18: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:00:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08083820 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.6837s; samplesPerSecond = 3744.3
+MPI Rank 2: 05/03/2016 18:00:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05756240 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.8431s; samplesPerSecond = 3036.4
+MPI Rank 2: 05/03/2016 18:00:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.03869542 * 2560; EvalErrorPrediction = 0.55703125 * 2560; time = 0.9531s; samplesPerSecond = 2686.1
+MPI Rank 2: 05/03/2016 18:00:21:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01921890 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9766s; samplesPerSecond = 2621.3
+MPI Rank 2: 05/03/2016 18:00:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.98871438 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.7057s; samplesPerSecond = 3627.8
+MPI Rank 2: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06763953 * 2560; EvalErrorPrediction = 0.56093750 * 2560; time = 0.7612s; samplesPerSecond = 3363.0
+MPI Rank 2: 05/03/2016 18:00:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02343111 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.9522s; samplesPerSecond = 2688.6
+MPI Rank 2: 05/03/2016 18:00:24:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.10504153 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9054s; samplesPerSecond = 2827.6
+MPI Rank 2: 05/03/2016 18:00:24: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04764268 * 20480; EvalErrorPrediction = 0.56328125 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.81519s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:00:25: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:03:57:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.1446s; samplesPerSecond = 4774.8
-MPI Rank 2: 05/03/2016 18:03:59:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1921s; samplesPerSecond = 4671.3
-MPI Rank 2: 05/03/2016 18:03:59: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.36319s
-MPI Rank 2: 05/03/2016 18:03:59: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:00:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:00:27:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95696442 * 10240; EvalErrorPrediction = 0.53564453 * 10240; time = 2.3115s; samplesPerSecond = 4430.0
+MPI Rank 2: 05/03/2016 18:00:29:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93975925 * 10240; EvalErrorPrediction = 0.53330078 * 10240; time = 2.1253s; samplesPerSecond = 4818.3
+MPI Rank 2: 05/03/2016 18:00:29: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94836183 * 20480; EvalErrorPrediction = 0.53447266 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.47496s
+MPI Rank 2: 05/03/2016 18:00:29: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:59: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:00:29: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:03:59: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:00:29: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.gpu.txt
index c83c796aa..f8a34c70b 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/.. OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -60,31 +60,31 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 2 in a gearbox of 3
+mpihelper: we are cog 1 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 1 in a gearbox of 3
+mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:04:00: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank0
-05/03/2016 18:04:00: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank1
-05/03/2016 18:04:01: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank2
+05/03/2016 18:00:29: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank0
+05/03/2016 18:00:30: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank1
+05/03/2016 18:00:30: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank2
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 18115 on
+mpiexec has exited due to process rank 0 with PID 3239 on
 node 87698aadbc9d exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -107,32 +107,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:00: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:00: Build info: 
+MPI Rank 0: 05/03/2016 18:00:29: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:00:29: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:04:00: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 0: 05/03/2016 18:04:00: 		Build type: release
-MPI Rank 0: 05/03/2016 18:04:00: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:04:00: 		With 1bit-SGD: yes
-MPI Rank 0: 05/03/2016 18:04:00: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:04:00: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:04:00: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:04:00: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:04:00: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:04:00: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:04:00: 		Built by philly on 87698aadbc9d
-MPI Rank 0: 05/03/2016 18:04:00: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:04:00: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:00:29: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:00:29: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 0: 05/03/2016 18:00:29: 		Build type: release
+MPI Rank 0: 05/03/2016 18:00:29: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:00:29: 		With 1bit-SGD: yes
+MPI Rank 0: 05/03/2016 18:00:29: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:00:29: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:00:29: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:00:29: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:00:29: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:00:29: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:00:29: 		Built by philly on 87698aadbc9d
+MPI Rank 0: 05/03/2016 18:00:29: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:00:29: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Running on localhost at 2016/05/03 18:04:00
-MPI Rank 0: 05/03/2016 18:04:00: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: 05/03/2016 18:00:29: Running on localhost at 2016/05/03 18:00:29
+MPI Rank 0: 05/03/2016 18:00:29: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:00: precision = "float"
+MPI Rank 0: 05/03/2016 18:00:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:29: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -222,30 +222,28 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:00: precision = "float"
+MPI Rank 0: 05/03/2016 18:00:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:29: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -329,36 +327,34 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:00:29: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -440,35 +436,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:04:00: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:04:00: Precision = "double"
-MPI Rank 0: 05/03/2016 18:04:00: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:04:00: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:04:00: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 18:04:00: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 18:00:29: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:00:29: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:00:29: Precision = "double"
+MPI Rank 0: 05/03/2016 18:00:29: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 18:00:29: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:00:29: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 18:00:29: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: ##############################################################################
-MPI Rank 0: 05/03/2016 18:04:00: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:00: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:04:00: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:00: ##############################################################################
+MPI Rank 0: 05/03/2016 18:00:29: ##############################################################################
+MPI Rank 0: 05/03/2016 18:00:29: #                                                                            #
+MPI Rank 0: 05/03/2016 18:00:29: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:00:29: #                                                                            #
+MPI Rank 0: 05/03/2016 18:00:29: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:00:29: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:00:30: Creating virgin network.
 MPI Rank 0: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -521,14 +516,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 18:00:30: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:04:00: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:00:30: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:00:30: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:00:30: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:00:30: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -536,135 +531,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x2d24148: {[features Value[363 x *]] }
-MPI Rank 0: 0x361c138: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x361c648: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x361d378: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x3b146e8: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x3b154b8: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x3b16658: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x3b17308: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x3b18138: {[labels Value[132 x *]] }
-MPI Rank 0: 0x3b19398: {[Prior Value[132]] }
-MPI Rank 0: 0x3b1ec38: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x3b1ef38: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x3b1f0f8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x3b1f588: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3b1f6f8: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x3b24d28: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x3e003c8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x3e00b88: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x3e00d98: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x3e00ef8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3e01058: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3e01218: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x3e013d8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x3e01598: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x3e020f8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x3e022b8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3e02478: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3e02638: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0x14566d8: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0x1491748: {[features Value[363 x *]] }
+MPI Rank 0: 0x215ed48: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x215f258: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x215ffc8: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x217faf8: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x25340d8: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x2535258: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x2535f08: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x2536d38: {[labels Value[132 x *]] }
+MPI Rank 0: 0x2537f98: {[Prior Value[132]] }
+MPI Rank 0: 0x253d9c8: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x253db28: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x253dce8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x253e178: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x253e2a8: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x253fa08: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x25401c8: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x25403d8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x2540538: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x25406f8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x25408b8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x2540a78: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x2540c38: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x2541798: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x2541958: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x2541b18: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x2541cd8: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:00:30: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:00: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:04:00: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:04:00: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:00:30: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:00:30: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:00:30: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:03: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:00:32: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:04: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:00:33: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:04: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1930s; samplesPerSecond = 3315.5
-MPI Rank 0: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0950s; samplesPerSecond = 6737.5
-MPI Rank 0: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0949s; samplesPerSecond = 6741.4
-MPI Rank 0: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0949s; samplesPerSecond = 6741.6
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0949s; samplesPerSecond = 6742.4
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0950s; samplesPerSecond = 6734.4
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0951s; samplesPerSecond = 6726.8
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0950s; samplesPerSecond = 6735.4
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0950s; samplesPerSecond = 6739.9
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0950s; samplesPerSecond = 6736.7
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0949s; samplesPerSecond = 6742.5
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0950s; samplesPerSecond = 6738.5
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0951s; samplesPerSecond = 6731.5
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0950s; samplesPerSecond = 6738.6
-MPI Rank 0: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0949s; samplesPerSecond = 6743.2
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0948s; samplesPerSecond = 6748.3
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0950s; samplesPerSecond = 6737.3
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0950s; samplesPerSecond = 6733.7
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0951s; samplesPerSecond = 6726.3
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0940s; samplesPerSecond = 6811.0
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0955s; samplesPerSecond = 6699.7
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0956s; samplesPerSecond = 6697.9
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0956s; samplesPerSecond = 6696.4
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0955s; samplesPerSecond = 6699.8
-MPI Rank 0: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0954s; samplesPerSecond = 6705.7
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6707.8
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0954s; samplesPerSecond = 6705.2
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6696.4
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0954s; samplesPerSecond = 6708.9
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0954s; samplesPerSecond = 6708.8
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0954s; samplesPerSecond = 6707.2
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0724s; samplesPerSecond = 8833.9
-MPI Rank 0: 05/03/2016 18:04:07: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.16669s
-MPI Rank 0: 05/03/2016 18:04:07: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:00:33: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0957s; samplesPerSecond = 6686.9
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0957s; samplesPerSecond = 6684.9
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6685.2
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6684.9
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6692.7
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6692.3
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0956s; samplesPerSecond = 6691.8
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6688.8
+MPI Rank 0: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6685.1
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6689.5
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0958s; samplesPerSecond = 6680.5
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0958s; samplesPerSecond = 6679.3
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0958s; samplesPerSecond = 6683.3
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0958s; samplesPerSecond = 6678.4
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0958s; samplesPerSecond = 6677.7
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0958s; samplesPerSecond = 6677.2
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0960s; samplesPerSecond = 6664.8
+MPI Rank 0: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0959s; samplesPerSecond = 6670.3
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0958s; samplesPerSecond = 6681.1
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0957s; samplesPerSecond = 6685.0
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6685.7
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0958s; samplesPerSecond = 6678.7
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0958s; samplesPerSecond = 6682.8
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6688.1
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0958s; samplesPerSecond = 6682.4
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0958s; samplesPerSecond = 6683.5
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0957s; samplesPerSecond = 6685.8
+MPI Rank 0: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0958s; samplesPerSecond = 6682.6
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0958s; samplesPerSecond = 6682.8
+MPI Rank 0: 05/03/2016 18:00:37: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.068s
+MPI Rank 0: 05/03/2016 18:00:37: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:00:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:07: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1537s; samplesPerSecond = 16652.8
-MPI Rank 0: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1388s; samplesPerSecond = 18438.5
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1390s; samplesPerSecond = 18422.3
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1384s; samplesPerSecond = 18501.5
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1384s; samplesPerSecond = 18501.3
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1386s; samplesPerSecond = 18469.4
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1382s; samplesPerSecond = 18522.8
-MPI Rank 0: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1389s; samplesPerSecond = 18424.7
-MPI Rank 0: 05/03/2016 18:04:08: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12985s
-MPI Rank 0: 05/03/2016 18:04:08: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 18:00:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1499s; samplesPerSecond = 17080.0
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1391s; samplesPerSecond = 18398.6
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1396s; samplesPerSecond = 18340.5
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1384s; samplesPerSecond = 18491.6
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1385s; samplesPerSecond = 18483.8
+MPI Rank 0: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1382s; samplesPerSecond = 18523.2
+MPI Rank 0: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1381s; samplesPerSecond = 18531.4
+MPI Rank 0: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1388s; samplesPerSecond = 18446.1
+MPI Rank 0: 05/03/2016 18:00:38: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12658s
+MPI Rank 0: 05/03/2016 18:00:38: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:08: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:00:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:08: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2812s; samplesPerSecond = 36415.1
-MPI Rank 0: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2793s; samplesPerSecond = 36662.0
-MPI Rank 0: 05/03/2016 18:04:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.566938s
-MPI Rank 0: 05/03/2016 18:04:09: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:04:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 18:00:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2825s; samplesPerSecond = 36253.7
+MPI Rank 0: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2813s; samplesPerSecond = 36397.6
+MPI Rank 0: 05/03/2016 18:00:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.570758s
+MPI Rank 0: 05/03/2016 18:00:38: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:00:38: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:00:38: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:04:00: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:04:00: Build info: 
+MPI Rank 0: 05/03/2016 18:00:38: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:00:30: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:00:30: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:04:00: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 1: 05/03/2016 18:04:00: 		Build type: release
-MPI Rank 1: 05/03/2016 18:04:00: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:04:00: 		With 1bit-SGD: yes
-MPI Rank 1: 05/03/2016 18:04:00: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:04:00: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:04:00: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:04:00: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:04:00: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:04:00: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:04:00: 		Built by philly on 87698aadbc9d
-MPI Rank 1: 05/03/2016 18:04:00: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:04:00: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:00:30: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:00:30: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 1: 05/03/2016 18:00:30: 		Build type: release
+MPI Rank 1: 05/03/2016 18:00:30: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:00:30: 		With 1bit-SGD: yes
+MPI Rank 1: 05/03/2016 18:00:30: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:00:30: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:00:30: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:00:30: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:00:30: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:00:30: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:00:30: 		Built by philly on 87698aadbc9d
+MPI Rank 1: 05/03/2016 18:00:30: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:00:30: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: Running on localhost at 2016/05/03 18:04:00
-MPI Rank 1: 05/03/2016 18:04:00: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: 05/03/2016 18:00:30: Running on localhost at 2016/05/03 18:00:30
+MPI Rank 1: 05/03/2016 18:00:30: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:00: precision = "float"
+MPI Rank 1: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:30: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -754,30 +754,28 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:00: precision = "float"
+MPI Rank 1: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:30: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -861,36 +859,34 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -972,35 +968,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:04:00: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:04:00: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:04:00: Precision = "double"
-MPI Rank 1: 05/03/2016 18:04:00: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:04:00: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:04:00: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 18:04:00: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:00:30: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:00:30: Precision = "double"
+MPI Rank 1: 05/03/2016 18:00:30: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 18:00:30: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:00:30: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 18:00:30: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: ##############################################################################
-MPI Rank 1: 05/03/2016 18:04:00: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:00: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:04:00: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:00: ##############################################################################
+MPI Rank 1: 05/03/2016 18:00:30: ##############################################################################
+MPI Rank 1: 05/03/2016 18:00:30: #                                                                            #
+MPI Rank 1: 05/03/2016 18:00:30: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:00:30: #                                                                            #
+MPI Rank 1: 05/03/2016 18:00:30: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:00:30: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:00: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:00:30: Creating virgin network.
 MPI Rank 1: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1053,14 +1048,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 18:00:30: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:04:01: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:00:30: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:00:30: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:00:30: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:00:30: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1068,132 +1063,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x2a5b768: {[features Value[363 x *]] }
-MPI Rank 1: 0x2f5a1f8: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x2f5a6c8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x2f5b3f8: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x37e0c58: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x37e2de8: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x3b97398: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x3b98518: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x3b991c8: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x3b99ff8: {[labels Value[132 x *]] }
-MPI Rank 1: 0x3b9b258: {[Prior Value[132]] }
-MPI Rank 1: 0x3ba0c88: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x3ba0de8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x3ba0fa8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x3ba1438: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3ba1568: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x3ba2cc8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x3ba3488: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x3ba3698: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x3ba37f8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3ba39b8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3ba3b78: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x3ba3d38: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x3ba3ef8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x3ba4a58: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x3ba4c18: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x3ba4dd8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x3ba4f98: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x2649218: {[features Value[363 x *]] }
+MPI Rank 1: 0x2fbec88: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x2fbf198: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0x2fbfe58: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x34e8bd8: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x34e99a8: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x34eab48: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x34eb7f8: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x34ec628: {[labels Value[132 x *]] }
+MPI Rank 1: 0x34ed888: {[Prior Value[132]] }
+MPI Rank 1: 0x34f3128: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x34f3428: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x34f35e8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x34f3a78: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x34f3be8: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0x34f91e8: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x37d48c8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x37d5088: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x37d5298: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x37d53f8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x37d5558: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x37d5718: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x37d58d8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x37d5a98: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x37d65f8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x37d67b8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x37d6978: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x37d6b38: {[B2 Gradient[132 x 1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:00:30: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:01: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:04:01: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:04:01: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:00:30: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:00:30: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:00:30: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:04: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:00:33: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:04: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:00:33: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:04: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1257s; samplesPerSecond = 5089.5
-MPI Rank 1: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0951s; samplesPerSecond = 6730.6
-MPI Rank 1: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0950s; samplesPerSecond = 6739.9
-MPI Rank 1: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0950s; samplesPerSecond = 6737.1
-MPI Rank 1: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0949s; samplesPerSecond = 6743.8
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0949s; samplesPerSecond = 6743.1
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0951s; samplesPerSecond = 6731.6
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0951s; samplesPerSecond = 6729.4
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0950s; samplesPerSecond = 6734.5
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0950s; samplesPerSecond = 6740.3
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0950s; samplesPerSecond = 6736.7
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0949s; samplesPerSecond = 6741.7
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0950s; samplesPerSecond = 6734.4
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0951s; samplesPerSecond = 6732.8
-MPI Rank 1: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0950s; samplesPerSecond = 6738.1
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0948s; samplesPerSecond = 6748.4
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0949s; samplesPerSecond = 6747.4
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0950s; samplesPerSecond = 6734.6
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0951s; samplesPerSecond = 6729.4
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0952s; samplesPerSecond = 6725.9
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0972s; samplesPerSecond = 6584.7
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0955s; samplesPerSecond = 6698.6
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0955s; samplesPerSecond = 6698.5
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0956s; samplesPerSecond = 6697.3
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0955s; samplesPerSecond = 6700.9
-MPI Rank 1: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6708.5
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0954s; samplesPerSecond = 6705.9
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0955s; samplesPerSecond = 6703.3
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0956s; samplesPerSecond = 6697.4
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0954s; samplesPerSecond = 6709.7
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0954s; samplesPerSecond = 6708.8
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0954s; samplesPerSecond = 6708.9
-MPI Rank 1: 05/03/2016 18:04:07: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.12095s
+MPI Rank 1: 05/03/2016 18:00:33: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0973s; samplesPerSecond = 6576.6
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0957s; samplesPerSecond = 6685.5
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6684.9
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6684.1
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6682.8
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6692.0
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6692.2
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0956s; samplesPerSecond = 6691.1
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6688.2
+MPI Rank 1: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6684.6
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6690.0
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0958s; samplesPerSecond = 6681.4
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0958s; samplesPerSecond = 6678.5
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0958s; samplesPerSecond = 6682.9
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0958s; samplesPerSecond = 6682.6
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0959s; samplesPerSecond = 6677.0
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0958s; samplesPerSecond = 6677.2
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0958s; samplesPerSecond = 6679.3
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0960s; samplesPerSecond = 6664.2
+MPI Rank 1: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0960s; samplesPerSecond = 6669.6
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0958s; samplesPerSecond = 6681.1
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0957s; samplesPerSecond = 6685.5
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6685.0
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0958s; samplesPerSecond = 6681.8
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0958s; samplesPerSecond = 6679.1
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6687.4
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0958s; samplesPerSecond = 6683.1
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0958s; samplesPerSecond = 6682.3
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0958s; samplesPerSecond = 6683.2
+MPI Rank 1: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0958s; samplesPerSecond = 6682.5
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0958s; samplesPerSecond = 6683.4
+MPI Rank 1: 05/03/2016 18:00:37: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.06969s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:00:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:07: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1535s; samplesPerSecond = 16680.5
-MPI Rank 1: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1391s; samplesPerSecond = 18401.1
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1390s; samplesPerSecond = 18416.5
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1380s; samplesPerSecond = 18548.8
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1384s; samplesPerSecond = 18503.0
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1390s; samplesPerSecond = 18423.5
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1382s; samplesPerSecond = 18518.9
-MPI Rank 1: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1389s; samplesPerSecond = 18425.4
-MPI Rank 1: 05/03/2016 18:04:08: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12959s
+MPI Rank 1: 05/03/2016 18:00:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1499s; samplesPerSecond = 17079.1
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1395s; samplesPerSecond = 18352.3
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1392s; samplesPerSecond = 18385.0
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1384s; samplesPerSecond = 18492.7
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1385s; samplesPerSecond = 18481.8
+MPI Rank 1: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1382s; samplesPerSecond = 18523.5
+MPI Rank 1: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1382s; samplesPerSecond = 18530.4
+MPI Rank 1: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1391s; samplesPerSecond = 18404.0
+MPI Rank 1: 05/03/2016 18:00:38: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12635s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:08: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:00:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:08: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2814s; samplesPerSecond = 36389.4
-MPI Rank 1: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2793s; samplesPerSecond = 36665.8
-MPI Rank 1: 05/03/2016 18:04:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.566713s
-MPI Rank 1: 05/03/2016 18:04:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:00:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2829s; samplesPerSecond = 36192.2
+MPI Rank 1: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2813s; samplesPerSecond = 36400.7
+MPI Rank 1: 05/03/2016 18:00:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.57053s
+MPI Rank 1: 05/03/2016 18:00:38: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:09: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:00:38: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:09: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:04:01: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:04:01: Build info: 
+MPI Rank 1: 05/03/2016 18:00:38: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:00:30: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:00:30: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:04:01: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 2: 05/03/2016 18:04:01: 		Build type: release
-MPI Rank 2: 05/03/2016 18:04:01: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:04:01: 		With 1bit-SGD: yes
-MPI Rank 2: 05/03/2016 18:04:01: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:04:01: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:04:01: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:04:01: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:04:01: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:04:01: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:04:01: 		Built by philly on 87698aadbc9d
-MPI Rank 2: 05/03/2016 18:04:01: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:04:01: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:00:30: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:00:30: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 2: 05/03/2016 18:00:30: 		Build type: release
+MPI Rank 2: 05/03/2016 18:00:30: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:00:30: 		With 1bit-SGD: yes
+MPI Rank 2: 05/03/2016 18:00:30: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:00:30: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:00:30: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:00:30: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:00:30: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:00:30: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:00:30: 		Built by philly on 87698aadbc9d
+MPI Rank 2: 05/03/2016 18:00:30: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:00:30: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Running on localhost at 2016/05/03 18:04:01
-MPI Rank 2: 05/03/2016 18:04:01: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: 05/03/2016 18:00:30: Running on localhost at 2016/05/03 18:00:30
+MPI Rank 2: 05/03/2016 18:00:30: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:01: precision = "float"
+MPI Rank 2: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:30: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1283,30 +1283,28 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:01: precision = "float"
+MPI Rank 2: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:30: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1390,36 +1388,34 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:00:30: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1501,35 +1497,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:04:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:04:01: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:04:01: Precision = "double"
-MPI Rank 2: 05/03/2016 18:04:01: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:04:01: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:04:01: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 18:04:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 18:00:30: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:00:30: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:00:30: Precision = "double"
+MPI Rank 2: 05/03/2016 18:00:30: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 18:00:30: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:00:30: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 18:00:30: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: ##############################################################################
-MPI Rank 2: 05/03/2016 18:04:01: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:01: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:04:01: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:01: ##############################################################################
+MPI Rank 2: 05/03/2016 18:00:30: ##############################################################################
+MPI Rank 2: 05/03/2016 18:00:30: #                                                                            #
+MPI Rank 2: 05/03/2016 18:00:30: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:00:30: #                                                                            #
+MPI Rank 2: 05/03/2016 18:00:30: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:00:30: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:00:31: Creating virgin network.
 MPI Rank 2: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1582,14 +1577,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 18:00:31: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:04:01: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:00:31: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:00:31: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:00:31: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:00:31: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1597,103 +1592,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x2cda188: {[features Value[363 x *]] }
-MPI Rank 2: 0x7febcbee7b18: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x7febcbee8d08: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x7febcbee9408: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x7febcbee9938: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x7febcbee9a98: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x7febcbee9c58: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x7febcbee9e18: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x7febcbee9fd8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x7febcbeea198: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x7febcbeea358: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x7febcbeeaeb8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x7febcbeeb078: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x7febcbeeb238: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x7febcbeeb3f8: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0x7febcc2004e8: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x7febcc2012a8: {[labels Value[132 x *]] }
-MPI Rank 2: 0x7febcc202508: {[Prior Value[132]] }
-MPI Rank 2: 0x7febcc207da8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x7febcc2080a8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x7febcc208268: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x7febcc209e18: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x7febcc20c1c8: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x7febcc20cf98: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x7febcc20e138: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x7febcc503258: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x7febcc5036c8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x7febcc5043f8: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x303e3e8: {[features Value[363 x *]] }
+MPI Rank 2: 0x3483318: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x3483b78: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x34858f8: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x3ee19d8: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0x3ee27a8: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x3ee3948: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x3ee45f8: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x3ee5428: {[labels Value[132 x *]] }
+MPI Rank 2: 0x3ee6688: {[Prior Value[132]] }
+MPI Rank 2: 0x3eebf28: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0x3eec228: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x3eec3e8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x3eec878: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x3eec9e8: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x3ef1fe8: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0x41cd6b8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x41cde78: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x41ce088: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x41ce1e8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x41ce348: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x41ce508: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x41ce6c8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x41ce888: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x41cf3e8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x41cf5a8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x41cf768: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x41cf928: {[B2 Gradient[132 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:00:31: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:01: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:04:01: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:04:01: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:00:31: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:00:31: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:00:31: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:04: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:00:33: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:04: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:00:33: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:04: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1985s; samplesPerSecond = 3223.8
-MPI Rank 2: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0950s; samplesPerSecond = 6738.5
-MPI Rank 2: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0950s; samplesPerSecond = 6738.9
-MPI Rank 2: 05/03/2016 18:04:04:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0949s; samplesPerSecond = 6742.0
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0949s; samplesPerSecond = 6741.8
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0950s; samplesPerSecond = 6733.8
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0951s; samplesPerSecond = 6728.2
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0950s; samplesPerSecond = 6734.9
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0950s; samplesPerSecond = 6740.1
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0950s; samplesPerSecond = 6736.3
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0949s; samplesPerSecond = 6741.5
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0950s; samplesPerSecond = 6734.0
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0951s; samplesPerSecond = 6733.3
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0950s; samplesPerSecond = 6738.3
-MPI Rank 2: 05/03/2016 18:04:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0949s; samplesPerSecond = 6744.6
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0948s; samplesPerSecond = 6747.6
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0950s; samplesPerSecond = 6736.6
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0950s; samplesPerSecond = 6734.4
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0952s; samplesPerSecond = 6725.5
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0957s; samplesPerSecond = 6687.1
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0956s; samplesPerSecond = 6696.2
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0956s; samplesPerSecond = 6697.9
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0955s; samplesPerSecond = 6699.7
-MPI Rank 2: 05/03/2016 18:04:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0955s; samplesPerSecond = 6704.9
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6707.2
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0955s; samplesPerSecond = 6702.1
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0955s; samplesPerSecond = 6698.3
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0954s; samplesPerSecond = 6709.0
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0954s; samplesPerSecond = 6707.6
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0954s; samplesPerSecond = 6708.2
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0678s; samplesPerSecond = 9443.3
-MPI Rank 2: 05/03/2016 18:04:07: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.16874s
+MPI Rank 2: 05/03/2016 18:00:33: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0969s; samplesPerSecond = 6603.9
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0958s; samplesPerSecond = 6683.1
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6685.0
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6686.2
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6682.6
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6692.5
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6692.3
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0956s; samplesPerSecond = 6691.8
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6688.7
+MPI Rank 2: 05/03/2016 18:00:34:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6684.4
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6689.3
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0958s; samplesPerSecond = 6681.1
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0958s; samplesPerSecond = 6679.2
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0958s; samplesPerSecond = 6682.2
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0958s; samplesPerSecond = 6678.1
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0958s; samplesPerSecond = 6677.6
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0958s; samplesPerSecond = 6677.5
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0960s; samplesPerSecond = 6664.7
+MPI Rank 2: 05/03/2016 18:00:35:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0959s; samplesPerSecond = 6670.3
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0958s; samplesPerSecond = 6680.9
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0957s; samplesPerSecond = 6684.9
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6685.0
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0958s; samplesPerSecond = 6683.2
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0958s; samplesPerSecond = 6678.4
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0958s; samplesPerSecond = 6682.5
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6688.2
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0958s; samplesPerSecond = 6682.2
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0958s; samplesPerSecond = 6682.8
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0958s; samplesPerSecond = 6684.1
+MPI Rank 2: 05/03/2016 18:00:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0958s; samplesPerSecond = 6681.8
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0958s; samplesPerSecond = 6682.7
+MPI Rank 2: 05/03/2016 18:00:37: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.06908s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:00:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:07: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1537s; samplesPerSecond = 16661.2
-MPI Rank 2: 05/03/2016 18:04:07:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1388s; samplesPerSecond = 18449.3
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1393s; samplesPerSecond = 18376.5
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1381s; samplesPerSecond = 18543.7
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1384s; samplesPerSecond = 18501.8
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1386s; samplesPerSecond = 18473.0
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1382s; samplesPerSecond = 18523.7
-MPI Rank 2: 05/03/2016 18:04:08:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1390s; samplesPerSecond = 18423.0
-MPI Rank 2: 05/03/2016 18:04:08: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12978s
+MPI Rank 2: 05/03/2016 18:00:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.07462499 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1500s; samplesPerSecond = 17071.3
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.05320994 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.1391s; samplesPerSecond = 18407.6
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02998212 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1396s; samplesPerSecond = 18333.1
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.01198559 * 2560; EvalErrorPrediction = 0.56640625 * 2560; time = 0.1384s; samplesPerSecond = 18491.0
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97539682 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.1385s; samplesPerSecond = 18485.6
+MPI Rank 2: 05/03/2016 18:00:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07754441 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1382s; samplesPerSecond = 18523.3
+MPI Rank 2: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.00580381 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.1382s; samplesPerSecond = 18529.9
+MPI Rank 2: 05/03/2016 18:00:38:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.13146949 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.1387s; samplesPerSecond = 18452.8
+MPI Rank 2: 05/03/2016 18:00:38: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.04500215 * 20480; EvalErrorPrediction = 0.56108398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.12651s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:08: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:00:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:08: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2815s; samplesPerSecond = 36376.9
-MPI Rank 2: 05/03/2016 18:04:09:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2790s; samplesPerSecond = 36708.7
-MPI Rank 2: 05/03/2016 18:04:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.566862s
-MPI Rank 2: 05/03/2016 18:04:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:00:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97563233 * 10240; EvalErrorPrediction = 0.54248047 * 10240; time = 0.2834s; samplesPerSecond = 36126.4
+MPI Rank 2: 05/03/2016 18:00:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94366837 * 10240; EvalErrorPrediction = 0.53730469 * 10240; time = 0.2810s; samplesPerSecond = 36442.2
+MPI Rank 2: 05/03/2016 18:00:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95965035 * 20480; EvalErrorPrediction = 0.53989258 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.570665s
+MPI Rank 2: 05/03/2016 18:00:38: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:09: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:00:38: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:09: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:00:38: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
index a8b41be13..5128ca1db 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -57,14 +57,14 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
@@ -83,45 +83,40 @@ job aborted:
 
 [0] process exited without calling finalize
 
-[1] process exited without calling finalize
-
-[2] terminated
+[1-2] terminated
 
 ---- error analysis -----
 
 [0] on CNTK-MUC01
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code -1
-
-[1] on CNTK-MUC01
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 14:50:06: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 14:50:06: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 14:50:06: Build info: 
+MPI Rank 0: 05/03/2016 13:22:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 13:22:28: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 13:22:28: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: 		Built time: May  3 2016 13:15:46
-MPI Rank 0: 05/03/2016 14:50:06: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 0: 05/03/2016 14:50:06: 		Build type: Release
-MPI Rank 0: 05/03/2016 14:50:06: 		Build target: GPU
-MPI Rank 0: 05/03/2016 14:50:06: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 14:50:06: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 14:50:06: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 14:50:06: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 14:50:06: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 14:50:06: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 14:50:06: 		Built by svcphil on cntk-muc01
-MPI Rank 0: 05/03/2016 14:50:06: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 14:50:06: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 13:22:28: 		Built time: May  3 2016 13:15:46
+MPI Rank 0: 05/03/2016 13:22:28: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 0: 05/03/2016 13:22:28: 		Build type: Release
+MPI Rank 0: 05/03/2016 13:22:28: 		Build target: GPU
+MPI Rank 0: 05/03/2016 13:22:28: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 13:22:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 13:22:28: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 13:22:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 13:22:28: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 13:22:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 13:22:28: 		Built by svcphil on cntk-muc01
+MPI Rank 0: 05/03/2016 13:22:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 13:22:28: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: Running on cntk-muc01 at 2016/05/03 14:50:06
-MPI Rank 0: 05/03/2016 14:50:06: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: 05/03/2016 13:22:28: Running on cntk-muc01 at 2016/05/03 13:22:28
+MPI Rank 0: 05/03/2016 13:22:28: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:50:06: precision = "float"
+MPI Rank 0: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -211,30 +206,28 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:50:06: precision = "float"
+MPI Rank 0: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -318,36 +311,34 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -429,35 +420,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 14:50:06: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 14:50:06: Commands: speechTrain
-MPI Rank 0: 05/03/2016 14:50:06: Precision = "double"
-MPI Rank 0: 05/03/2016 14:50:06: Using 2 CPU threads.
-MPI Rank 0: 05/03/2016 14:50:06: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 14:50:06: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 14:50:06: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 13:22:28: Commands: speechTrain
+MPI Rank 0: 05/03/2016 13:22:28: Precision = "double"
+MPI Rank 0: 05/03/2016 13:22:28: Using 2 CPU threads.
+MPI Rank 0: 05/03/2016 13:22:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 13:22:28: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 13:22:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: ##############################################################################
-MPI Rank 0: 05/03/2016 14:50:06: #                                                                            #
-MPI Rank 0: 05/03/2016 14:50:06: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 14:50:06: #                                                                            #
-MPI Rank 0: 05/03/2016 14:50:06: ##############################################################################
+MPI Rank 0: 05/03/2016 13:22:28: ##############################################################################
+MPI Rank 0: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 0: 05/03/2016 13:22:28: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 0: 05/03/2016 13:22:28: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:06: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 13:22:28: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: Creating virgin network.
+MPI Rank 0: 05/03/2016 13:22:29: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -509,14 +499,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 13:22:29: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: Training criterion node(s):
-MPI Rank 0: 05/03/2016 14:50:07: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 13:22:29: Training criterion node(s):
+MPI Rank 0: 05/03/2016 13:22:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 13:22:29: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 13:22:29: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -524,135 +514,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0000005AB664CAF0: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0000005AB664CD70: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0000005AB664CFF0: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0000005AB664D130: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0000005AB664D9F0: {[features Value[363 x *]] }
-MPI Rank 0: 0000005AB667ABE0: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0000005AB6702970: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0000005AB6702A10: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0000005AB6702BF0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0000005AB6702D30: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0000005AB6702DD0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0000005AB6702F10: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0000005AB67030F0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0000005AB6703230: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0000005AB67032D0: {[labels Value[132 x *]] }
-MPI Rank 0: 0000005AB67034B0: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0000005AB67037D0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0000005AB67039B0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0000005AB6703AF0: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0000005AB6703C30: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0000005AB6703EB0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0000005AB6704090: {[Prior Value[132]] }
-MPI Rank 0: 0000005AB6704270: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0000005AB67043B0: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0000005AB6704450: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0000005AB67044F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000005AB6704590: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000005AB6704630: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000009B4E3324F0: {[features Value[363 x *]] }
+MPI Rank 0: 0000009B4E332950: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0000009B4E332B30: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0000009B4E332D10: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0000009B4E332EF0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0000009B4E348B30: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0000009B4E348C70: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0000009B4E348D10: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000009B4E348F90: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0000009B4E3490D0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0000009B4E349170: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0000009B4E349350: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0000009B4E3493F0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0000009B4E3495D0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000009B4E349710: {[Prior Value[132]] }
+MPI Rank 0: 0000009B4E3497B0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0000009B4E3498F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0000009B4E349A30: {[labels Value[132 x *]] }
+MPI Rank 0: 0000009B4E349DF0: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0000009B4E349E90: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0000009B4E34A1B0: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0000009B4E34A390: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0000009B4E34A430: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000009B4E34A570: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0000009B4E34A930: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0000009B4E34A9D0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0000009B572DBE00: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000009B572DC620: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 13:22:29: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:07: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 14:50:07: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 14:50:07: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 13:22:29: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 13:22:29: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 13:22:29: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:08: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 13:22:31: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:09: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 13:22:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:09: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 14:50:10:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9824s; samplesPerSecond = 651.5
-MPI Rank 0: 05/03/2016 14:50:11:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7148s; samplesPerSecond = 895.3
-MPI Rank 0: 05/03/2016 14:50:12:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7748s; samplesPerSecond = 826.0
-MPI Rank 0: 05/03/2016 14:50:13:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7873s; samplesPerSecond = 812.9
-MPI Rank 0: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.8068s; samplesPerSecond = 793.2
-MPI Rank 0: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.8292s; samplesPerSecond = 771.8
-MPI Rank 0: 05/03/2016 14:50:15:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7961s; samplesPerSecond = 803.9
-MPI Rank 0: 05/03/2016 14:50:16:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.8043s; samplesPerSecond = 795.7
-MPI Rank 0: 05/03/2016 14:50:17:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6897s; samplesPerSecond = 927.9
-MPI Rank 0: 05/03/2016 14:50:17:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7721s; samplesPerSecond = 828.9
-MPI Rank 0: 05/03/2016 14:50:18:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6875s; samplesPerSecond = 930.9
-MPI Rank 0: 05/03/2016 14:50:19:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7527s; samplesPerSecond = 850.3
-MPI Rank 0: 05/03/2016 14:50:20:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7934s; samplesPerSecond = 806.6
-MPI Rank 0: 05/03/2016 14:50:21:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.8440s; samplesPerSecond = 758.3
-MPI Rank 0: 05/03/2016 14:50:21:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.8155s; samplesPerSecond = 784.8
-MPI Rank 0: 05/03/2016 14:50:22:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.8443s; samplesPerSecond = 758.0
-MPI Rank 0: 05/03/2016 14:50:23:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7954s; samplesPerSecond = 804.6
-MPI Rank 0: 05/03/2016 14:50:24:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.8222s; samplesPerSecond = 778.4
-MPI Rank 0: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.8426s; samplesPerSecond = 759.6
-MPI Rank 0: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7299s; samplesPerSecond = 876.8
-MPI Rank 0: 05/03/2016 14:50:26:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.8006s; samplesPerSecond = 799.4
-MPI Rank 0: 05/03/2016 14:50:27:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7684s; samplesPerSecond = 832.9
-MPI Rank 0: 05/03/2016 14:50:28:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7756s; samplesPerSecond = 825.2
-MPI Rank 0: 05/03/2016 14:50:28:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7699s; samplesPerSecond = 831.2
-MPI Rank 0: 05/03/2016 14:50:29:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7466s; samplesPerSecond = 857.2
-MPI Rank 0: 05/03/2016 14:50:30:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6998s; samplesPerSecond = 914.6
-MPI Rank 0: 05/03/2016 14:50:31:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6903s; samplesPerSecond = 927.2
-MPI Rank 0: 05/03/2016 14:50:31:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.5875s; samplesPerSecond = 1089.4
-MPI Rank 0: 05/03/2016 14:50:32:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2992s; samplesPerSecond = 2138.9
-MPI Rank 0: 05/03/2016 14:50:32:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.2313s; samplesPerSecond = 2767.6
-MPI Rank 0: 05/03/2016 14:50:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.2348s; samplesPerSecond = 2725.9
-MPI Rank 0: 05/03/2016 14:50:32:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2356s; samplesPerSecond = 2716.4
-MPI Rank 0: 05/03/2016 14:50:32: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.8437s
-MPI Rank 0: 05/03/2016 14:50:32: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 13:22:32: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 13:22:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7082s; samplesPerSecond = 903.7
+MPI Rank 0: 05/03/2016 13:22:33:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6928s; samplesPerSecond = 923.8
+MPI Rank 0: 05/03/2016 13:22:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7033s; samplesPerSecond = 909.9
+MPI Rank 0: 05/03/2016 13:22:34:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6976s; samplesPerSecond = 917.5
+MPI Rank 0: 05/03/2016 13:22:35:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7471s; samplesPerSecond = 856.6
+MPI Rank 0: 05/03/2016 13:22:36:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7178s; samplesPerSecond = 891.7
+MPI Rank 0: 05/03/2016 13:22:37:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7061s; samplesPerSecond = 906.3
+MPI Rank 0: 05/03/2016 13:22:37:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7352s; samplesPerSecond = 870.5
+MPI Rank 0: 05/03/2016 13:22:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7215s; samplesPerSecond = 887.1
+MPI Rank 0: 05/03/2016 13:22:39:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7110s; samplesPerSecond = 900.1
+MPI Rank 0: 05/03/2016 13:22:40:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7261s; samplesPerSecond = 881.4
+MPI Rank 0: 05/03/2016 13:22:40:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7112s; samplesPerSecond = 899.8
+MPI Rank 0: 05/03/2016 13:22:41:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6829s; samplesPerSecond = 937.1
+MPI Rank 0: 05/03/2016 13:22:42:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7137s; samplesPerSecond = 896.7
+MPI Rank 0: 05/03/2016 13:22:42:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7010s; samplesPerSecond = 913.0
+MPI Rank 0: 05/03/2016 13:22:43:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7419s; samplesPerSecond = 862.6
+MPI Rank 0: 05/03/2016 13:22:44:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7021s; samplesPerSecond = 911.6
+MPI Rank 0: 05/03/2016 13:22:45:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7418s; samplesPerSecond = 862.8
+MPI Rank 0: 05/03/2016 13:22:45:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7054s; samplesPerSecond = 907.3
+MPI Rank 0: 05/03/2016 13:22:46:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7458s; samplesPerSecond = 858.1
+MPI Rank 0: 05/03/2016 13:22:47:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6937s; samplesPerSecond = 922.5
+MPI Rank 0: 05/03/2016 13:22:47:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7131s; samplesPerSecond = 897.5
+MPI Rank 0: 05/03/2016 13:22:48:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7007s; samplesPerSecond = 913.4
+MPI Rank 0: 05/03/2016 13:22:49:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7138s; samplesPerSecond = 896.6
+MPI Rank 0: 05/03/2016 13:22:50:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7105s; samplesPerSecond = 900.8
+MPI Rank 0: 05/03/2016 13:22:50:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7329s; samplesPerSecond = 873.2
+MPI Rank 0: 05/03/2016 13:22:51:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7203s; samplesPerSecond = 888.5
+MPI Rank 0: 05/03/2016 13:22:52:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6972s; samplesPerSecond = 918.0
+MPI Rank 0: 05/03/2016 13:22:52:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7204s; samplesPerSecond = 888.4
+MPI Rank 0: 05/03/2016 13:22:53:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6947s; samplesPerSecond = 921.2
+MPI Rank 0: 05/03/2016 13:22:54:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6481s; samplesPerSecond = 987.5
+MPI Rank 0: 05/03/2016 13:22:54:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6051s; samplesPerSecond = 1057.7
+MPI Rank 0: 05/03/2016 13:22:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.6802s
+MPI Rank 0: 05/03/2016 13:22:55: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:32: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 13:22:55: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:50:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:59:16:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 523.6721s; samplesPerSecond = 4.9
-MPI Rank 0: 05/03/2016 15:03:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 243.4318s; samplesPerSecond = 10.5
-MPI Rank 0: 05/03/2016 15:08:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 284.1997s; samplesPerSecond = 9.0
-MPI Rank 0: 05/03/2016 15:11:27:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 203.6331s; samplesPerSecond = 12.6
-MPI Rank 0: 05/03/2016 15:13:45:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 137.3139s; samplesPerSecond = 18.6
-MPI Rank 0: 05/03/2016 15:19:12:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 326.9489s; samplesPerSecond = 7.8
-MPI Rank 0: 05/03/2016 15:21:36:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 144.1827s; samplesPerSecond = 17.8
-MPI Rank 0: 05/03/2016 15:25:27:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 231.3784s; samplesPerSecond = 11.1
-MPI Rank 0: 05/03/2016 15:25:30: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=2097.82s
-MPI Rank 0: 05/03/2016 15:25:30: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 13:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 13:31:56:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 541.5354s; samplesPerSecond = 4.7
+MPI Rank 0: 05/03/2016 13:41:11:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 554.4157s; samplesPerSecond = 4.6
+MPI Rank 0: 05/03/2016 13:48:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 409.5203s; samplesPerSecond = 6.3
+MPI Rank 0: 05/03/2016 13:54:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 367.7873s; samplesPerSecond = 7.0
+MPI Rank 0: 05/03/2016 13:57:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 194.0593s; samplesPerSecond = 13.2
+MPI Rank 0: 05/03/2016 14:05:15:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 472.6057s; samplesPerSecond = 5.4
+MPI Rank 0: 05/03/2016 14:13:19:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 484.6714s; samplesPerSecond = 5.3
+MPI Rank 0: 05/03/2016 14:17:49:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 269.7641s; samplesPerSecond = 9.5
+MPI Rank 0: 05/03/2016 14:17:52: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3297.57s
+MPI Rank 0: 05/03/2016 14:17:53: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:25:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:17:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:25:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 15:43:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1062.5932s; samplesPerSecond = 9.6
-MPI Rank 0: 05/03/2016 15:52:05:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 532.4809s; samplesPerSecond = 19.2
-MPI Rank 0: 05/03/2016 15:52:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1598.29s
-MPI Rank 0: 05/03/2016 15:52:09: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 15:52:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:17:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:36:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1098.5768s; samplesPerSecond = 9.3
+MPI Rank 0: 05/03/2016 14:45:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 584.9987s; samplesPerSecond = 17.5
+MPI Rank 0: 05/03/2016 14:45:58: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1685.83s
+MPI Rank 0: 05/03/2016 14:45:58: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:45:58: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:09: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:45:58: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:09: __COMPLETED__
-MPI Rank 1: 05/03/2016 14:50:07: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 14:50:07: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 14:50:07: Build info: 
+MPI Rank 0: 05/03/2016 14:45:58: __COMPLETED__
+MPI Rank 1: 05/03/2016 13:22:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 13:22:28: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 13:22:28: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: 		Built time: May  3 2016 13:15:46
-MPI Rank 1: 05/03/2016 14:50:07: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 1: 05/03/2016 14:50:07: 		Build type: Release
-MPI Rank 1: 05/03/2016 14:50:07: 		Build target: GPU
-MPI Rank 1: 05/03/2016 14:50:07: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 14:50:07: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 14:50:07: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 14:50:07: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 14:50:07: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 14:50:07: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 14:50:07: 		Built by svcphil on cntk-muc01
-MPI Rank 1: 05/03/2016 14:50:07: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 14:50:07: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 13:22:28: 		Built time: May  3 2016 13:15:46
+MPI Rank 1: 05/03/2016 13:22:28: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 1: 05/03/2016 13:22:28: 		Build type: Release
+MPI Rank 1: 05/03/2016 13:22:28: 		Build target: GPU
+MPI Rank 1: 05/03/2016 13:22:28: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 13:22:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 13:22:28: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 13:22:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 13:22:28: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 13:22:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 13:22:28: 		Built by svcphil on cntk-muc01
+MPI Rank 1: 05/03/2016 13:22:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 13:22:28: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Running on cntk-muc01 at 2016/05/03 14:50:07
-MPI Rank 1: 05/03/2016 14:50:07: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: 05/03/2016 13:22:28: Running on cntk-muc01 at 2016/05/03 13:22:28
+MPI Rank 1: 05/03/2016 13:22:28: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:50:07: precision = "float"
+MPI Rank 1: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -742,30 +737,28 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:50:07: precision = "float"
+MPI Rank 1: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -849,36 +842,34 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -960,35 +951,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 14:50:07: Commands: speechTrain
-MPI Rank 1: 05/03/2016 14:50:07: Precision = "double"
-MPI Rank 1: 05/03/2016 14:50:07: Using 2 CPU threads.
-MPI Rank 1: 05/03/2016 14:50:07: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 14:50:07: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 14:50:07: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 13:22:28: Commands: speechTrain
+MPI Rank 1: 05/03/2016 13:22:28: Precision = "double"
+MPI Rank 1: 05/03/2016 13:22:28: Using 2 CPU threads.
+MPI Rank 1: 05/03/2016 13:22:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 13:22:28: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 13:22:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: ##############################################################################
-MPI Rank 1: 05/03/2016 14:50:07: #                                                                            #
-MPI Rank 1: 05/03/2016 14:50:07: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 14:50:07: #                                                                            #
-MPI Rank 1: 05/03/2016 14:50:07: ##############################################################################
+MPI Rank 1: 05/03/2016 13:22:28: ##############################################################################
+MPI Rank 1: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 1: 05/03/2016 13:22:28: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 1: 05/03/2016 13:22:28: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 13:22:28: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Creating virgin network.
+MPI Rank 1: 05/03/2016 13:22:29: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1040,14 +1030,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 13:22:29: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Training criterion node(s):
-MPI Rank 1: 05/03/2016 14:50:07: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 13:22:29: Training criterion node(s):
+MPI Rank 1: 05/03/2016 13:22:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 13:22:29: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 13:22:29: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1055,132 +1045,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 00000023B30BCAF0: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 00000023B30BD3B0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 00000023B30BD450: {[W0 Value[512 x 363]] }
-MPI Rank 1: 00000023B30BD630: {[features Value[363 x *]] }
-MPI Rank 1: 00000023B30BD770: {[B0 Value[512 x 1]] }
-MPI Rank 1: 00000023BBF23ED0: {[W2 Value[132 x 512]] }
-MPI Rank 1: 00000023BBF24010: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 00000023BBF241F0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 00000023BBF24330: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 00000023BBF245B0: {[labels Value[132 x *]] }
-MPI Rank 1: 00000023BBF24830: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 00000023BBF24A10: {[Prior Value[132]] }
-MPI Rank 1: 00000023BBF24BF0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 00000023BBF24C90: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000023BBF24D30: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000023BBF24E70: {[B2 Value[132 x 1]] }
-MPI Rank 1: 00000023BBF25230: {[LogOfPrior Value[132]] }
-MPI Rank 1: 00000023BBF252D0: {[W0*features Value[512 x *]] }
-MPI Rank 1: 00000023BBF25370: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 00000023BBF25410: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 00000023BBF254B0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 00000023BBF255F0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000023BBF25690: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000023BBF25730: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 00000023BBF25AF0: {[W1 Value[512 x 512]] }
-MPI Rank 1: 00000023BBF25B90: {[B1 Value[512 x 1]] }
-MPI Rank 1: 00000023BBF25D70: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 00000023BBF26E80: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0000008599E01660: {[features Value[363 x *]] }
+MPI Rank 1: 0000008599E01980: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0000008599E01AC0: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0000008599E01CA0: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0000008599E01F20: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0000008599E16100: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0000008599E161A0: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0000008599E16240: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0000008599E16380: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0000008599E16420: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0000008599E16560: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0000008599E167E0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0000008599E169C0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0000008599E16A60: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0000008599E16B00: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0000008599E16EC0: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0000008599E170A0: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0000008599E171E0: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0000008599E173C0: {[Prior Value[132]] }
+MPI Rank 1: 0000008599E17460: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0000008599E17500: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0000008599E17640: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0000008599E176E0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0000008599E17960: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0000008599E17A00: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0000008599E17BE0: {[labels Value[132 x *]] }
+MPI Rank 1: 0000008599E34360: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0000008599E35DA0: {[W2*H1 Gradient[132 x 1 x *]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 13:22:29: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:07: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 14:50:07: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 14:50:07: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 13:22:29: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 13:22:29: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 13:22:29: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:09: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 13:22:32: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:09: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 13:22:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:09: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 14:50:10:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.8098s; samplesPerSecond = 790.4
-MPI Rank 1: 05/03/2016 14:50:11:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6936s; samplesPerSecond = 922.8
-MPI Rank 1: 05/03/2016 14:50:12:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6450s; samplesPerSecond = 992.3
-MPI Rank 1: 05/03/2016 14:50:12:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6731s; samplesPerSecond = 950.9
-MPI Rank 1: 05/03/2016 14:50:13:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6608s; samplesPerSecond = 968.5
-MPI Rank 1: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6717s; samplesPerSecond = 952.7
-MPI Rank 1: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6555s; samplesPerSecond = 976.3
-MPI Rank 1: 05/03/2016 14:50:15:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6733s; samplesPerSecond = 950.6
-MPI Rank 1: 05/03/2016 14:50:16:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6404s; samplesPerSecond = 999.4
-MPI Rank 1: 05/03/2016 14:50:16:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7361s; samplesPerSecond = 869.4
-MPI Rank 1: 05/03/2016 14:50:17:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7217s; samplesPerSecond = 886.8
-MPI Rank 1: 05/03/2016 14:50:18:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6634s; samplesPerSecond = 964.7
-MPI Rank 1: 05/03/2016 14:50:18:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7052s; samplesPerSecond = 907.6
-MPI Rank 1: 05/03/2016 14:50:19:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6943s; samplesPerSecond = 921.8
-MPI Rank 1: 05/03/2016 14:50:20:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6173s; samplesPerSecond = 1036.8
-MPI Rank 1: 05/03/2016 14:50:20:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6322s; samplesPerSecond = 1012.3
-MPI Rank 1: 05/03/2016 14:50:21:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6298s; samplesPerSecond = 1016.2
-MPI Rank 1: 05/03/2016 14:50:22:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6121s; samplesPerSecond = 1045.6
-MPI Rank 1: 05/03/2016 14:50:22:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6290s; samplesPerSecond = 1017.4
-MPI Rank 1: 05/03/2016 14:50:23:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6397s; samplesPerSecond = 1000.5
-MPI Rank 1: 05/03/2016 14:50:24:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6454s; samplesPerSecond = 991.6
-MPI Rank 1: 05/03/2016 14:50:24:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6318s; samplesPerSecond = 1012.9
-MPI Rank 1: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6392s; samplesPerSecond = 1001.3
-MPI Rank 1: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6720s; samplesPerSecond = 952.4
-MPI Rank 1: 05/03/2016 14:50:26:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6593s; samplesPerSecond = 970.8
-MPI Rank 1: 05/03/2016 14:50:27:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6411s; samplesPerSecond = 998.3
-MPI Rank 1: 05/03/2016 14:50:27:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6778s; samplesPerSecond = 944.2
-MPI Rank 1: 05/03/2016 14:50:28:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6706s; samplesPerSecond = 954.3
-MPI Rank 1: 05/03/2016 14:50:29:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6587s; samplesPerSecond = 971.6
-MPI Rank 1: 05/03/2016 14:50:29:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6918s; samplesPerSecond = 925.1
-MPI Rank 1: 05/03/2016 14:50:30:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.7368s; samplesPerSecond = 868.6
-MPI Rank 1: 05/03/2016 14:50:31:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.7108s; samplesPerSecond = 900.4
-MPI Rank 1: 05/03/2016 14:50:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.5578s
+MPI Rank 1: 05/03/2016 13:22:32: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 13:22:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7625s; samplesPerSecond = 839.4
+MPI Rank 1: 05/03/2016 13:22:33:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7673s; samplesPerSecond = 834.1
+MPI Rank 1: 05/03/2016 13:22:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7499s; samplesPerSecond = 853.4
+MPI Rank 1: 05/03/2016 13:22:35:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7636s; samplesPerSecond = 838.1
+MPI Rank 1: 05/03/2016 13:22:35:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7485s; samplesPerSecond = 855.0
+MPI Rank 1: 05/03/2016 13:22:36:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7642s; samplesPerSecond = 837.5
+MPI Rank 1: 05/03/2016 13:22:37:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7354s; samplesPerSecond = 870.2
+MPI Rank 1: 05/03/2016 13:22:38:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7195s; samplesPerSecond = 889.5
+MPI Rank 1: 05/03/2016 13:22:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7305s; samplesPerSecond = 876.1
+MPI Rank 1: 05/03/2016 13:22:39:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7650s; samplesPerSecond = 836.5
+MPI Rank 1: 05/03/2016 13:22:40:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7467s; samplesPerSecond = 857.1
+MPI Rank 1: 05/03/2016 13:22:41:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7897s; samplesPerSecond = 810.4
+MPI Rank 1: 05/03/2016 13:22:41:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7444s; samplesPerSecond = 859.8
+MPI Rank 1: 05/03/2016 13:22:42:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7452s; samplesPerSecond = 858.8
+MPI Rank 1: 05/03/2016 13:22:43:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7058s; samplesPerSecond = 906.8
+MPI Rank 1: 05/03/2016 13:22:44:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7217s; samplesPerSecond = 886.8
+MPI Rank 1: 05/03/2016 13:22:44:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7888s; samplesPerSecond = 811.4
+MPI Rank 1: 05/03/2016 13:22:45:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7427s; samplesPerSecond = 861.7
+MPI Rank 1: 05/03/2016 13:22:46:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7223s; samplesPerSecond = 886.1
+MPI Rank 1: 05/03/2016 13:22:47:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7241s; samplesPerSecond = 883.8
+MPI Rank 1: 05/03/2016 13:22:47:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7480s; samplesPerSecond = 855.6
+MPI Rank 1: 05/03/2016 13:22:48:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7535s; samplesPerSecond = 849.4
+MPI Rank 1: 05/03/2016 13:22:49:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7494s; samplesPerSecond = 854.1
+MPI Rank 1: 05/03/2016 13:22:50:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7593s; samplesPerSecond = 842.9
+MPI Rank 1: 05/03/2016 13:22:50:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7290s; samplesPerSecond = 877.9
+MPI Rank 1: 05/03/2016 13:22:51:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7203s; samplesPerSecond = 888.5
+MPI Rank 1: 05/03/2016 13:22:52:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7572s; samplesPerSecond = 845.2
+MPI Rank 1: 05/03/2016 13:22:53:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7505s; samplesPerSecond = 852.8
+MPI Rank 1: 05/03/2016 13:22:53:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7677s; samplesPerSecond = 833.7
+MPI Rank 1: 05/03/2016 13:22:54:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.7958s; samplesPerSecond = 804.2
+MPI Rank 1: 05/03/2016 13:22:55:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.3579s; samplesPerSecond = 1788.1
+MPI Rank 1: 05/03/2016 13:22:55:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2411s; samplesPerSecond = 2655.0
+MPI Rank 1: 05/03/2016 13:22:55: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=23.0846s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:32: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 13:22:55: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:50:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:59:16:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 523.6819s; samplesPerSecond = 4.9
-MPI Rank 1: 05/03/2016 15:03:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 243.4377s; samplesPerSecond = 10.5
-MPI Rank 1: 05/03/2016 15:08:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 284.1961s; samplesPerSecond = 9.0
-MPI Rank 1: 05/03/2016 15:11:27:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 203.6337s; samplesPerSecond = 12.6
-MPI Rank 1: 05/03/2016 15:13:45:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 137.3129s; samplesPerSecond = 18.6
-MPI Rank 1: 05/03/2016 15:19:12:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 326.9499s; samplesPerSecond = 7.8
-MPI Rank 1: 05/03/2016 15:21:36:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 144.1820s; samplesPerSecond = 17.8
-MPI Rank 1: 05/03/2016 15:25:27:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 231.3833s; samplesPerSecond = 11.1
-MPI Rank 1: 05/03/2016 15:25:30: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=2097.83s
+MPI Rank 1: 05/03/2016 13:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 13:31:56:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 541.5471s; samplesPerSecond = 4.7
+MPI Rank 1: 05/03/2016 13:41:11:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 554.4160s; samplesPerSecond = 4.6
+MPI Rank 1: 05/03/2016 13:48:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 409.5195s; samplesPerSecond = 6.3
+MPI Rank 1: 05/03/2016 13:54:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 367.7872s; samplesPerSecond = 7.0
+MPI Rank 1: 05/03/2016 13:57:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 194.0591s; samplesPerSecond = 13.2
+MPI Rank 1: 05/03/2016 14:05:15:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 472.6055s; samplesPerSecond = 5.4
+MPI Rank 1: 05/03/2016 14:13:20:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 484.6720s; samplesPerSecond = 5.3
+MPI Rank 1: 05/03/2016 14:17:50:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 270.8972s; samplesPerSecond = 9.5
+MPI Rank 1: 05/03/2016 14:17:52: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3297.58s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:25:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:17:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:25:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 15:43:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1062.6132s; samplesPerSecond = 9.6
-MPI Rank 1: 05/03/2016 15:52:06:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 533.6150s; samplesPerSecond = 19.2
-MPI Rank 1: 05/03/2016 15:52:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1598.31s
-MPI Rank 1: 05/03/2016 15:52:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:17:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:36:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1098.5950s; samplesPerSecond = 9.3
+MPI Rank 1: 05/03/2016 14:45:57:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 585.3804s; samplesPerSecond = 17.5
+MPI Rank 1: 05/03/2016 14:45:58: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1685.85s
+MPI Rank 1: 05/03/2016 14:45:58: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:09: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:45:58: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:09: __COMPLETED__
-MPI Rank 2: 05/03/2016 14:50:07: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 14:50:07: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 14:50:07: Build info: 
+MPI Rank 1: 05/03/2016 14:45:58: __COMPLETED__
+MPI Rank 2: 05/03/2016 13:22:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 13:22:28: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 13:22:28: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: 		Built time: May  3 2016 13:15:46
-MPI Rank 2: 05/03/2016 14:50:07: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 2: 05/03/2016 14:50:07: 		Build type: Release
-MPI Rank 2: 05/03/2016 14:50:07: 		Build target: GPU
-MPI Rank 2: 05/03/2016 14:50:07: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 14:50:07: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 14:50:07: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 14:50:07: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 14:50:07: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 14:50:07: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 14:50:07: 		Built by svcphil on cntk-muc01
-MPI Rank 2: 05/03/2016 14:50:07: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 14:50:07: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 13:22:28: 		Built time: May  3 2016 13:15:46
+MPI Rank 2: 05/03/2016 13:22:28: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 2: 05/03/2016 13:22:28: 		Build type: Release
+MPI Rank 2: 05/03/2016 13:22:28: 		Build target: GPU
+MPI Rank 2: 05/03/2016 13:22:28: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 13:22:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 13:22:28: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 13:22:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 13:22:28: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 13:22:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 13:22:28: 		Built by svcphil on cntk-muc01
+MPI Rank 2: 05/03/2016 13:22:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 13:22:28: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: Running on cntk-muc01 at 2016/05/03 14:50:07
-MPI Rank 2: 05/03/2016 14:50:07: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: 05/03/2016 13:22:28: Running on cntk-muc01 at 2016/05/03 13:22:28
+MPI Rank 2: 05/03/2016 13:22:28: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:50:07: precision = "float"
+MPI Rank 2: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1270,30 +1265,28 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:50:07: precision = "float"
+MPI Rank 2: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 13:22:28: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1377,36 +1370,34 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:07: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 13:22:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1488,35 +1479,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 14:50:07: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 14:50:07: Commands: speechTrain
-MPI Rank 2: 05/03/2016 14:50:07: Precision = "double"
-MPI Rank 2: 05/03/2016 14:50:08: Using 2 CPU threads.
-MPI Rank 2: 05/03/2016 14:50:08: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 14:50:08: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 14:50:08: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 13:22:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 13:22:28: Commands: speechTrain
+MPI Rank 2: 05/03/2016 13:22:28: Precision = "double"
+MPI Rank 2: 05/03/2016 13:22:28: Using 2 CPU threads.
+MPI Rank 2: 05/03/2016 13:22:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 13:22:28: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 13:22:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: ##############################################################################
-MPI Rank 2: 05/03/2016 14:50:08: #                                                                            #
-MPI Rank 2: 05/03/2016 14:50:08: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 14:50:08: #                                                                            #
-MPI Rank 2: 05/03/2016 14:50:08: ##############################################################################
+MPI Rank 2: 05/03/2016 13:22:28: ##############################################################################
+MPI Rank 2: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 2: 05/03/2016 13:22:28: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 13:22:28: #                                                                            #
+MPI Rank 2: 05/03/2016 13:22:28: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 13:22:28: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: Creating virgin network.
+MPI Rank 2: 05/03/2016 13:22:29: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1568,14 +1558,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 13:22:29: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: Training criterion node(s):
-MPI Rank 2: 05/03/2016 14:50:08: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 13:22:29: Training criterion node(s):
+MPI Rank 2: 05/03/2016 13:22:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 13:22:29: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 13:22:29: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1583,103 +1573,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 000000056DADCC30: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 000000056DADCCD0: {[B0 Value[512 x 1]] }
-MPI Rank 2: 000000056DADD3B0: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 000000056DADD450: {[W0 Value[512 x 363]] }
-MPI Rank 2: 000000056DADD630: {[features Value[363 x *]] }
-MPI Rank 2: 000000056DB0B360: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 000000056DB92970: {[W2 Value[132 x 512]] }
-MPI Rank 2: 000000056DB92B50: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 000000056DB92BF0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 000000056DB92C90: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000056DB92DD0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000056DB92E70: {[W1 Value[512 x 512]] }
-MPI Rank 2: 000000056DB92FB0: {[B1 Value[512 x 1]] }
-MPI Rank 2: 000000056DB93190: {[LogOfPrior Value[132]] }
-MPI Rank 2: 000000056DB93230: {[W0*features Value[512 x *]] }
-MPI Rank 2: 000000056DB932D0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 000000056DB93690: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 000000056DB93730: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 000000056DB93910: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 000000056DB939B0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 000000056DB93B90: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 000000056DB93E10: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 000000056DB93F50: {[B2 Value[132 x 1]] }
-MPI Rank 2: 000000056DB94090: {[Prior Value[132]] }
-MPI Rank 2: 000000056DB941D0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000056DB943B0: {[labels Value[132 x *]] }
-MPI Rank 2: 000000056DB944F0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000056DB946D0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000D12CFB13F0: {[features Value[363 x *]] }
+MPI Rank 2: 000000D12CFB1670: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000D12CFB1850: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000D12CFB1F30: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000D12CFB21B0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000D1363BCAF0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000D1363BCE10: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 000000D1363DA9F0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000D1363DAB30: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000D1363DADB0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000D1363DAE50: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000D1363DB030: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000D1363DB0D0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000D1363DB170: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000D1363DB490: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000D1363DB530: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000D1363DB5D0: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000D1363DB670: {[labels Value[132 x *]] }
+MPI Rank 2: 000000D1363DB850: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000D1363DB8F0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000D1363DBAD0: {[Prior Value[132]] }
+MPI Rank 2: 000000D1363DBB70: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000D1363DBC10: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000D1363DBDF0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000D1363DBFD0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000D1363DC2F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000D1363DC750: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000D1363DC890: {[LogOfPrior Value[132]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 13:22:29: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:08: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 14:50:08: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 14:50:08: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 13:22:29: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 13:22:29: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 13:22:29: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:09: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 13:22:31: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:09: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 13:22:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:09: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 14:50:10:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9230s; samplesPerSecond = 693.4
-MPI Rank 2: 05/03/2016 14:50:11:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7133s; samplesPerSecond = 897.3
-MPI Rank 2: 05/03/2016 14:50:12:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6955s; samplesPerSecond = 920.2
-MPI Rank 2: 05/03/2016 14:50:12:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6857s; samplesPerSecond = 933.4
-MPI Rank 2: 05/03/2016 14:50:13:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6479s; samplesPerSecond = 987.8
-MPI Rank 2: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6541s; samplesPerSecond = 978.5
-MPI Rank 2: 05/03/2016 14:50:14:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6412s; samplesPerSecond = 998.1
-MPI Rank 2: 05/03/2016 14:50:15:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6860s; samplesPerSecond = 933.0
-MPI Rank 2: 05/03/2016 14:50:16:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6258s; samplesPerSecond = 1022.7
-MPI Rank 2: 05/03/2016 14:50:16:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6546s; samplesPerSecond = 977.7
-MPI Rank 2: 05/03/2016 14:50:17:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6998s; samplesPerSecond = 914.5
-MPI Rank 2: 05/03/2016 14:50:18:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7292s; samplesPerSecond = 877.6
-MPI Rank 2: 05/03/2016 14:50:18:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6904s; samplesPerSecond = 927.1
-MPI Rank 2: 05/03/2016 14:50:19:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6589s; samplesPerSecond = 971.3
-MPI Rank 2: 05/03/2016 14:50:20:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6952s; samplesPerSecond = 920.6
-MPI Rank 2: 05/03/2016 14:50:21:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6755s; samplesPerSecond = 947.4
-MPI Rank 2: 05/03/2016 14:50:21:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6899s; samplesPerSecond = 927.7
-MPI Rank 2: 05/03/2016 14:50:22:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6761s; samplesPerSecond = 946.6
-MPI Rank 2: 05/03/2016 14:50:23:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6758s; samplesPerSecond = 947.0
-MPI Rank 2: 05/03/2016 14:50:23:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6857s; samplesPerSecond = 933.3
-MPI Rank 2: 05/03/2016 14:50:24:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6692s; samplesPerSecond = 956.3
-MPI Rank 2: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6850s; samplesPerSecond = 934.3
-MPI Rank 2: 05/03/2016 14:50:25:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7011s; samplesPerSecond = 912.9
-MPI Rank 2: 05/03/2016 14:50:26:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6828s; samplesPerSecond = 937.4
-MPI Rank 2: 05/03/2016 14:50:27:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6710s; samplesPerSecond = 953.8
-MPI Rank 2: 05/03/2016 14:50:27:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6793s; samplesPerSecond = 942.1
-MPI Rank 2: 05/03/2016 14:50:28:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6619s; samplesPerSecond = 966.9
-MPI Rank 2: 05/03/2016 14:50:29:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7020s; samplesPerSecond = 911.7
-MPI Rank 2: 05/03/2016 14:50:29:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7083s; samplesPerSecond = 903.6
-MPI Rank 2: 05/03/2016 14:50:30:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6878s; samplesPerSecond = 930.5
-MPI Rank 2: 05/03/2016 14:50:31:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6923s; samplesPerSecond = 924.5
-MPI Rank 2: 05/03/2016 14:50:31:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5149s; samplesPerSecond = 1242.9
-MPI Rank 2: 05/03/2016 14:50:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.9516s
+MPI Rank 2: 05/03/2016 13:22:32: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 13:22:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.6684s; samplesPerSecond = 957.5
+MPI Rank 2: 05/03/2016 13:22:33:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7143s; samplesPerSecond = 896.0
+MPI Rank 2: 05/03/2016 13:22:34:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7016s; samplesPerSecond = 912.3
+MPI Rank 2: 05/03/2016 13:22:34:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7022s; samplesPerSecond = 911.4
+MPI Rank 2: 05/03/2016 13:22:35:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6733s; samplesPerSecond = 950.5
+MPI Rank 2: 05/03/2016 13:22:36:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6912s; samplesPerSecond = 926.0
+MPI Rank 2: 05/03/2016 13:22:37:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7058s; samplesPerSecond = 906.8
+MPI Rank 2: 05/03/2016 13:22:37:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6903s; samplesPerSecond = 927.1
+MPI Rank 2: 05/03/2016 13:22:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7189s; samplesPerSecond = 890.3
+MPI Rank 2: 05/03/2016 13:22:39:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6956s; samplesPerSecond = 920.1
+MPI Rank 2: 05/03/2016 13:22:39:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6890s; samplesPerSecond = 928.8
+MPI Rank 2: 05/03/2016 13:22:40:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6941s; samplesPerSecond = 922.1
+MPI Rank 2: 05/03/2016 13:22:41:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7152s; samplesPerSecond = 894.9
+MPI Rank 2: 05/03/2016 13:22:41:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6830s; samplesPerSecond = 937.1
+MPI Rank 2: 05/03/2016 13:22:42:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7013s; samplesPerSecond = 912.6
+MPI Rank 2: 05/03/2016 13:22:43:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7140s; samplesPerSecond = 896.4
+MPI Rank 2: 05/03/2016 13:22:44:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7445s; samplesPerSecond = 859.7
+MPI Rank 2: 05/03/2016 13:22:44:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6746s; samplesPerSecond = 948.7
+MPI Rank 2: 05/03/2016 13:22:45:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6833s; samplesPerSecond = 936.7
+MPI Rank 2: 05/03/2016 13:22:46:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7173s; samplesPerSecond = 892.3
+MPI Rank 2: 05/03/2016 13:22:46:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7240s; samplesPerSecond = 884.0
+MPI Rank 2: 05/03/2016 13:22:47:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7104s; samplesPerSecond = 901.0
+MPI Rank 2: 05/03/2016 13:22:48:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7033s; samplesPerSecond = 909.9
+MPI Rank 2: 05/03/2016 13:22:49:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7111s; samplesPerSecond = 900.1
+MPI Rank 2: 05/03/2016 13:22:49:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6942s; samplesPerSecond = 921.9
+MPI Rank 2: 05/03/2016 13:22:50:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6778s; samplesPerSecond = 944.3
+MPI Rank 2: 05/03/2016 13:22:51:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7162s; samplesPerSecond = 893.6
+MPI Rank 2: 05/03/2016 13:22:51:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7206s; samplesPerSecond = 888.1
+MPI Rank 2: 05/03/2016 13:22:52:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7025s; samplesPerSecond = 911.0
+MPI Rank 2: 05/03/2016 13:22:53:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6966s; samplesPerSecond = 918.8
+MPI Rank 2: 05/03/2016 13:22:53:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6946s; samplesPerSecond = 921.4
+MPI Rank 2: 05/03/2016 13:22:54:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6679s; samplesPerSecond = 958.2
+MPI Rank 2: 05/03/2016 13:22:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.4137s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:32: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 13:22:55: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:50:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:59:16:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 523.6867s; samplesPerSecond = 4.9
-MPI Rank 2: 05/03/2016 15:03:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 243.4243s; samplesPerSecond = 10.5
-MPI Rank 2: 05/03/2016 15:08:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 284.1994s; samplesPerSecond = 9.0
-MPI Rank 2: 05/03/2016 15:11:27:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 203.6395s; samplesPerSecond = 12.6
-MPI Rank 2: 05/03/2016 15:13:45:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 137.3128s; samplesPerSecond = 18.6
-MPI Rank 2: 05/03/2016 15:19:12:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 326.9437s; samplesPerSecond = 7.8
-MPI Rank 2: 05/03/2016 15:21:36:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 144.1881s; samplesPerSecond = 17.8
-MPI Rank 2: 05/03/2016 15:25:28:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 232.5188s; samplesPerSecond = 11.0
-MPI Rank 2: 05/03/2016 15:25:30: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=2097.83s
+MPI Rank 2: 05/03/2016 13:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 13:31:56:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13368596 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 541.5413s; samplesPerSecond = 4.7
+MPI Rank 2: 05/03/2016 13:41:11:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.03773809 * 2560; EvalErrorPrediction = 0.55898437 * 2560; time = 554.4157s; samplesPerSecond = 4.6
+MPI Rank 2: 05/03/2016 13:48:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02936769 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 409.5201s; samplesPerSecond = 6.3
+MPI Rank 2: 05/03/2016 13:54:08:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93929048 * 2560; EvalErrorPrediction = 0.53593750 * 2560; time = 367.7871s; samplesPerSecond = 7.0
+MPI Rank 2: 05/03/2016 13:57:22:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.92419932 * 2560; EvalErrorPrediction = 0.52617187 * 2560; time = 194.0660s; samplesPerSecond = 13.2
+MPI Rank 2: 05/03/2016 14:05:15:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94695921 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 472.5988s; samplesPerSecond = 5.4
+MPI Rank 2: 05/03/2016 14:13:20:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94673081 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 484.6784s; samplesPerSecond = 5.3
+MPI Rank 2: 05/03/2016 14:17:50:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.91211204 * 2560; EvalErrorPrediction = 0.53945312 * 2560; time = 270.8817s; samplesPerSecond = 9.5
+MPI Rank 2: 05/03/2016 14:17:52: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98376045 * 20480; EvalErrorPrediction = 0.54653320 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3297.58s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:25:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:17:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:25:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 15:43:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1062.6041s; samplesPerSecond = 9.6
-MPI Rank 2: 05/03/2016 15:52:06:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 533.6060s; samplesPerSecond = 19.2
-MPI Rank 2: 05/03/2016 15:52:09: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1598.3s
-MPI Rank 2: 05/03/2016 15:52:09: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:17:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:36:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.92705928 * 10240; EvalErrorPrediction = 0.54765625 * 10240; time = 1098.5952s; samplesPerSecond = 9.3
+MPI Rank 2: 05/03/2016 14:45:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90745194 * 10240; EvalErrorPrediction = 0.52822266 * 10240; time = 585.1713s; samplesPerSecond = 17.5
+MPI Rank 2: 05/03/2016 14:45:58: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91725561 * 20480; EvalErrorPrediction = 0.53793945 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1685.84s
+MPI Rank 2: 05/03/2016 14:45:58: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:09: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:45:58: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:09: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:45:58: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
index 46fcabeea..42b50c70c 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -58,20 +58,20 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 ping [requestnodes (before change)]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 2 in a gearbox of 3
 mpihelper: we are cog 1 in a gearbox of 3
+ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
@@ -94,32 +94,32 @@ C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 15:52:11: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 15:52:11: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 15:52:11: Build info: 
+MPI Rank 0: 05/03/2016 14:46:01: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:46:01: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:46:01: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: 		Built time: May  3 2016 13:15:46
-MPI Rank 0: 05/03/2016 15:52:11: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 0: 05/03/2016 15:52:11: 		Build type: Release
-MPI Rank 0: 05/03/2016 15:52:11: 		Build target: GPU
-MPI Rank 0: 05/03/2016 15:52:11: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 15:52:11: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 15:52:11: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 15:52:11: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 15:52:11: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 15:52:11: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 15:52:11: 		Built by svcphil on cntk-muc01
-MPI Rank 0: 05/03/2016 15:52:11: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 15:52:11: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:46:01: 		Built time: May  3 2016 13:15:46
+MPI Rank 0: 05/03/2016 14:46:01: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 0: 05/03/2016 14:46:01: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:46:01: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:46:01: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:46:01: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:46:01: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:46:01: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:46:01: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:46:01: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:46:01: 		Built by svcphil on cntk-muc01
+MPI Rank 0: 05/03/2016 14:46:01: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:46:01: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: Running on cntk-muc01 at 2016/05/03 15:52:11
-MPI Rank 0: 05/03/2016 15:52:11: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: 05/03/2016 14:46:01: Running on cntk-muc01 at 2016/05/03 14:46:01
+MPI Rank 0: 05/03/2016 14:46:01: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:52:11: precision = "float"
+MPI Rank 0: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:46:01: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -209,30 +209,28 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:52:11: precision = "float"
+MPI Rank 0: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:46:01: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -316,36 +314,34 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -427,35 +423,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 15:52:11: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 15:52:11: Commands: speechTrain
-MPI Rank 0: 05/03/2016 15:52:11: Precision = "double"
-MPI Rank 0: 05/03/2016 15:52:11: Using 2 CPU threads.
-MPI Rank 0: 05/03/2016 15:52:11: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 15:52:11: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 15:52:11: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:46:01: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:46:01: Precision = "double"
+MPI Rank 0: 05/03/2016 14:46:01: Using 2 CPU threads.
+MPI Rank 0: 05/03/2016 14:46:01: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:46:01: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 14:46:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: ##############################################################################
-MPI Rank 0: 05/03/2016 15:52:11: #                                                                            #
-MPI Rank 0: 05/03/2016 15:52:11: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 15:52:11: #                                                                            #
-MPI Rank 0: 05/03/2016 15:52:11: ##############################################################################
+MPI Rank 0: 05/03/2016 14:46:01: ##############################################################################
+MPI Rank 0: 05/03/2016 14:46:01: #                                                                            #
+MPI Rank 0: 05/03/2016 14:46:01: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:46:01: #                                                                            #
+MPI Rank 0: 05/03/2016 14:46:01: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:46:01: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:11: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:46:01: Creating virgin network.
 MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -508,14 +503,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 14:46:02: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: Training criterion node(s):
-MPI Rank 0: 05/03/2016 15:52:12: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:46:02: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:46:02: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:46:02: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:46:02: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -523,135 +518,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0000000E12297F20: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0000000E12297FC0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0000000E12298240: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0000000E12298740: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0000000E12299280: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0000000E12299500: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0000000E14DEAE70: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0000000E14DEAF10: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0000000E14DEAFB0: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0000000E14DEB050: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0000000E14DEB230: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0000000E14DEB2D0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0000000E14DEB370: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0000000E14DEB410: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0000000E14DEBB90: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0000000E14DEBEB0: {[labels Value[132 x *]] }
-MPI Rank 0: 0000000E14DEC270: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0000000E14DEC310: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0000000E14DEC3B0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0000000E14DEC590: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0000000E14DEC630: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0000000E14DEC810: {[Prior Value[132]] }
-MPI Rank 0: 0000000E14DEC950: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0000000E14DEC9F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0000000E14DECA90: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0000000E14DECBD0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000000E14DECD10: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000000E72BBE190: {[features Value[363 x *]] }
+MPI Rank 0: 000000A5294617B0: {[features Value[363 x *]] }
+MPI Rank 0: 000000A547B40490: {[W0 Value[512 x 363]] }
+MPI Rank 0: 000000A547B408F0: {[W1 Value[512 x 512]] }
+MPI Rank 0: 000000A547B40990: {[B1 Value[512 x 1]] }
+MPI Rank 0: 000000A547B40F30: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 000000A547B41110: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 000000A547B41BB0: {[B0 Value[512 x 1]] }
+MPI Rank 0: 000000A54A975590: {[labels Value[132 x *]] }
+MPI Rank 0: 000000A54A975630: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000A54A975770: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 000000A54A975810: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 000000A54A9759F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000A54A975A90: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000A54A975B30: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 000000A54A975C70: {[W0*features Value[512 x *]] }
+MPI Rank 0: 000000A54A975EF0: {[W2 Value[132 x 512]] }
+MPI Rank 0: 000000A54A975F90: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 000000A54A976030: {[B2 Value[132 x 1]] }
+MPI Rank 0: 000000A54A9760D0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 000000A54A976350: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 000000A54A9763F0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000A54A9765D0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 000000A54A976710: {[Prior Value[132]] }
+MPI Rank 0: 000000A54A976A30: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 000000A54A976DF0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000A54A977110: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 000000A54A9772F0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 000000A54A977390: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:46:02: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:12: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 15:52:12: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 15:52:12: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:46:02: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:46:02: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:46:02: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:16: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:46:07: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:46:07: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:17: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4708s; samplesPerSecond = 1359.5
-MPI Rank 0: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3234s; samplesPerSecond = 1978.7
-MPI Rank 0: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3128s; samplesPerSecond = 2045.8
-MPI Rank 0: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3136s; samplesPerSecond = 2040.8
-MPI Rank 0: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3135s; samplesPerSecond = 2041.6
-MPI Rank 0: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3137s; samplesPerSecond = 2040.3
-MPI Rank 0: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3128s; samplesPerSecond = 2045.7
-MPI Rank 0: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3135s; samplesPerSecond = 2041.4
-MPI Rank 0: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3139s; samplesPerSecond = 2039.0
-MPI Rank 0: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3026s; samplesPerSecond = 2115.0
-MPI Rank 0: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3226s; samplesPerSecond = 1983.6
-MPI Rank 0: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3143s; samplesPerSecond = 2036.0
-MPI Rank 0: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3124s; samplesPerSecond = 2048.7
-MPI Rank 0: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3021s; samplesPerSecond = 2118.6
-MPI Rank 0: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3134s; samplesPerSecond = 2042.3
-MPI Rank 0: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3121s; samplesPerSecond = 2050.3
-MPI Rank 0: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3034s; samplesPerSecond = 2109.4
-MPI Rank 0: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3332s; samplesPerSecond = 1920.6
-MPI Rank 0: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3134s; samplesPerSecond = 2042.3
-MPI Rank 0: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3133s; samplesPerSecond = 2042.8
-MPI Rank 0: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3022s; samplesPerSecond = 2118.0
-MPI Rank 0: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3233s; samplesPerSecond = 1979.4
-MPI Rank 0: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3124s; samplesPerSecond = 2048.5
-MPI Rank 0: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.2930s; samplesPerSecond = 2184.5
-MPI Rank 0: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3143s; samplesPerSecond = 2036.4
-MPI Rank 0: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3033s; samplesPerSecond = 2110.4
-MPI Rank 0: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3329s; samplesPerSecond = 1922.4
-MPI Rank 0: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3021s; samplesPerSecond = 2118.7
-MPI Rank 0: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3237s; samplesPerSecond = 1976.9
-MPI Rank 0: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3135s; samplesPerSecond = 2041.3
-MPI Rank 0: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3142s; samplesPerSecond = 2036.8
-MPI Rank 0: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2310s; samplesPerSecond = 2770.1
-MPI Rank 0: 05/03/2016 15:52:27: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.2077s
-MPI Rank 0: 05/03/2016 15:52:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:46:07: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3100s; samplesPerSecond = 2064.6
+MPI Rank 0: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3145s; samplesPerSecond = 2034.9
+MPI Rank 0: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3132s; samplesPerSecond = 2043.4
+MPI Rank 0: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3167s; samplesPerSecond = 2020.9
+MPI Rank 0: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3093s; samplesPerSecond = 2069.4
+MPI Rank 0: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3139s; samplesPerSecond = 2039.0
+MPI Rank 0: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3127s; samplesPerSecond = 2046.6
+MPI Rank 0: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3147s; samplesPerSecond = 2033.9
+MPI Rank 0: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3125s; samplesPerSecond = 2047.7
+MPI Rank 0: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3122s; samplesPerSecond = 2050.1
+MPI Rank 0: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3145s; samplesPerSecond = 2035.0
+MPI Rank 0: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3140s; samplesPerSecond = 2038.4
+MPI Rank 0: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3128s; samplesPerSecond = 2045.9
+MPI Rank 0: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3145s; samplesPerSecond = 2034.9
+MPI Rank 0: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3128s; samplesPerSecond = 2046.3
+MPI Rank 0: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3126s; samplesPerSecond = 2047.4
+MPI Rank 0: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3149s; samplesPerSecond = 2032.2
+MPI Rank 0: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3131s; samplesPerSecond = 2044.3
+MPI Rank 0: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3126s; samplesPerSecond = 2047.6
+MPI Rank 0: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3146s; samplesPerSecond = 2034.1
+MPI Rank 0: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3128s; samplesPerSecond = 2046.3
+MPI Rank 0: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3129s; samplesPerSecond = 2045.3
+MPI Rank 0: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3137s; samplesPerSecond = 2040.4
+MPI Rank 0: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3130s; samplesPerSecond = 2044.9
+MPI Rank 0: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3167s; samplesPerSecond = 2020.8
+MPI Rank 0: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3399s; samplesPerSecond = 1883.2
+MPI Rank 0: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.2923s; samplesPerSecond = 2189.7
+MPI Rank 0: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3143s; samplesPerSecond = 2036.5
+MPI Rank 0: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3141s; samplesPerSecond = 2037.9
+MPI Rank 0: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3126s; samplesPerSecond = 2047.5
+MPI Rank 0: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3122s; samplesPerSecond = 2050.0
+MPI Rank 0: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3124s; samplesPerSecond = 2048.9
+MPI Rank 0: 05/03/2016 14:46:17: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0448s
+MPI Rank 0: 05/03/2016 14:46:17: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:46:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5412s; samplesPerSecond = 4730.5
-MPI Rank 0: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5061s; samplesPerSecond = 5058.2
-MPI Rank 0: 05/03/2016 15:52:29:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5079s; samplesPerSecond = 5040.8
-MPI Rank 0: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5106s; samplesPerSecond = 5013.8
-MPI Rank 0: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5058s; samplesPerSecond = 5061.4
-MPI Rank 0: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5121s; samplesPerSecond = 4999.3
-MPI Rank 0: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5099s; samplesPerSecond = 5020.5
-MPI Rank 0: 05/03/2016 15:52:32:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5122s; samplesPerSecond = 4998.1
-MPI Rank 0: 05/03/2016 15:52:32: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.12533s
-MPI Rank 0: 05/03/2016 15:52:32: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 14:46:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:46:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5387s; samplesPerSecond = 4751.8
+MPI Rank 0: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5114s; samplesPerSecond = 5005.9
+MPI Rank 0: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5110s; samplesPerSecond = 5009.6
+MPI Rank 0: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5171s; samplesPerSecond = 4950.9
+MPI Rank 0: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5113s; samplesPerSecond = 5006.7
+MPI Rank 0: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5173s; samplesPerSecond = 4948.9
+MPI Rank 0: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5139s; samplesPerSecond = 4981.3
+MPI Rank 0: 05/03/2016 14:46:22:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5170s; samplesPerSecond = 4951.7
+MPI Rank 0: 05/03/2016 14:46:22: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.15581s
+MPI Rank 0: 05/03/2016 14:46:22: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:32: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:46:22: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 15:52:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5579s; samplesPerSecond = 6572.9
-MPI Rank 0: 05/03/2016 15:52:35:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5282s; samplesPerSecond = 6700.8
-MPI Rank 0: 05/03/2016 15:52:35: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10658s
-MPI Rank 0: 05/03/2016 15:52:35: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 15:52:35: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:46:22: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:46:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5695s; samplesPerSecond = 6524.5
+MPI Rank 0: 05/03/2016 14:46:25:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5454s; samplesPerSecond = 6625.9
+MPI Rank 0: 05/03/2016 14:46:25: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.1321s
+MPI Rank 0: 05/03/2016 14:46:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:46:25: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:35: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:46:25: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:35: __COMPLETED__
-MPI Rank 1: 05/03/2016 15:52:11: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 15:52:12: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 15:52:12: Build info: 
+MPI Rank 0: 05/03/2016 14:46:25: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:46:01: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:46:01: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:46:01: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: 		Built time: May  3 2016 13:15:46
-MPI Rank 1: 05/03/2016 15:52:12: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 1: 05/03/2016 15:52:12: 		Build type: Release
-MPI Rank 1: 05/03/2016 15:52:12: 		Build target: GPU
-MPI Rank 1: 05/03/2016 15:52:12: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 15:52:12: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 15:52:12: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 15:52:12: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 15:52:12: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 15:52:12: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 15:52:12: 		Built by svcphil on cntk-muc01
-MPI Rank 1: 05/03/2016 15:52:12: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 15:52:12: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:46:01: 		Built time: May  3 2016 13:15:46
+MPI Rank 1: 05/03/2016 14:46:01: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 1: 05/03/2016 14:46:01: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:46:01: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:46:01: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:46:01: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:46:01: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:46:01: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:46:01: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:46:01: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:46:01: 		Built by svcphil on cntk-muc01
+MPI Rank 1: 05/03/2016 14:46:01: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:46:01: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: Running on cntk-muc01 at 2016/05/03 15:52:12
-MPI Rank 1: 05/03/2016 15:52:12: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: 05/03/2016 14:46:01: Running on cntk-muc01 at 2016/05/03 14:46:01
+MPI Rank 1: 05/03/2016 14:46:01: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:52:12: precision = "float"
+MPI Rank 1: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:46:01: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -741,30 +741,28 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:52:12: precision = "float"
+MPI Rank 1: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:46:01: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -848,36 +846,34 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:46:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -959,35 +955,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 15:52:12: Commands: speechTrain
-MPI Rank 1: 05/03/2016 15:52:12: Precision = "double"
-MPI Rank 1: 05/03/2016 15:52:12: Using 2 CPU threads.
-MPI Rank 1: 05/03/2016 15:52:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 15:52:12: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 15:52:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 14:46:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:46:01: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:46:01: Precision = "double"
+MPI Rank 1: 05/03/2016 14:46:01: Using 2 CPU threads.
+MPI Rank 1: 05/03/2016 14:46:01: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:46:01: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 14:46:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: ##############################################################################
-MPI Rank 1: 05/03/2016 15:52:12: #                                                                            #
-MPI Rank 1: 05/03/2016 15:52:12: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 15:52:12: #                                                                            #
-MPI Rank 1: 05/03/2016 15:52:12: ##############################################################################
+MPI Rank 1: 05/03/2016 14:46:01: ##############################################################################
+MPI Rank 1: 05/03/2016 14:46:01: #                                                                            #
+MPI Rank 1: 05/03/2016 14:46:01: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:46:01: #                                                                            #
+MPI Rank 1: 05/03/2016 14:46:01: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:46:01: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:12: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:46:02: Creating virgin network.
 MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1040,14 +1035,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 14:46:02: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: Training criterion node(s):
-MPI Rank 1: 05/03/2016 15:52:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:46:02: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:46:02: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:46:02: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:46:02: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1055,132 +1050,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0000006A3073CD70: {[features Value[363 x *]] }
-MPI Rank 1: 0000006A4F9C22F0: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0000006A4F9C2610: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0000006A4F9C27F0: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0000006A4F9C2ED0: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0000006A4F9C3510: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0000006A4F9C3D30: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0000006A524D8B00: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0000006A524D8BA0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006A524D8D80: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0000006A524D8E20: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006A524D8F60: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0000006A524D9000: {[Prior Value[132]] }
-MPI Rank 1: 0000006A524D90A0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0000006A524D9280: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0000006A524D9500: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0000006A524D95A0: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0000006A524D9640: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0000006A524D96E0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0000006A524D98C0: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0000006A524D9960: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0000006A524D9C80: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006A524D9E60: {[labels Value[132 x *]] }
-MPI Rank 1: 0000006A524DA040: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0000006A524DA0E0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0000006A524DA2C0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0000006A524DA360: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0000006A524DA4A0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0000000508E43E00: {[features Value[363 x *]] }
+MPI Rank 1: 00000005271A1430: {[B0 Value[512 x 1]] }
+MPI Rank 1: 00000005271A14D0: {[W1 Value[512 x 512]] }
+MPI Rank 1: 00000005271A1B10: {[B1 Value[512 x 1]] }
+MPI Rank 1: 00000005271A1ED0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 00000005271A21F0: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 00000005271A2470: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000052A08F5D0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000052A08F670: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000052A08FB70: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000052A08FC10: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000052A08FCB0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000052A08FF30: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000052A08FFD0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000052A090070: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000052A090250: {[Prior Value[132]] }
+MPI Rank 1: 000000052A090390: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 000000052A090430: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000052A0904D0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000052A090570: {[labels Value[132 x *]] }
+MPI Rank 1: 000000052A090610: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000052A090890: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000052A090A70: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000052A090B10: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000052A090C50: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000052A090CF0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000052A090D90: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 000000052A091010: {[LogOfPrior Value[132]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:46:02: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:13: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 15:52:13: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 15:52:13: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:46:02: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:46:02: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:46:02: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:17: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:46:07: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:46:07: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:17: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4711s; samplesPerSecond = 1358.6
-MPI Rank 1: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3027s; samplesPerSecond = 2114.4
-MPI Rank 1: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3337s; samplesPerSecond = 1918.2
-MPI Rank 1: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3135s; samplesPerSecond = 2041.6
-MPI Rank 1: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3138s; samplesPerSecond = 2039.6
-MPI Rank 1: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3136s; samplesPerSecond = 2040.6
-MPI Rank 1: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3128s; samplesPerSecond = 2046.0
-MPI Rank 1: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3135s; samplesPerSecond = 2041.3
-MPI Rank 1: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3137s; samplesPerSecond = 2040.1
-MPI Rank 1: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3238s; samplesPerSecond = 1976.7
-MPI Rank 1: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3331s; samplesPerSecond = 1921.6
-MPI Rank 1: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3138s; samplesPerSecond = 2039.6
-MPI Rank 1: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3127s; samplesPerSecond = 2046.5
-MPI Rank 1: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3340s; samplesPerSecond = 1916.0
-MPI Rank 1: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3124s; samplesPerSecond = 2048.6
-MPI Rank 1: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3132s; samplesPerSecond = 2043.6
-MPI Rank 1: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3233s; samplesPerSecond = 1979.6
-MPI Rank 1: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3130s; samplesPerSecond = 2044.5
-MPI Rank 1: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3130s; samplesPerSecond = 2044.5
-MPI Rank 1: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3135s; samplesPerSecond = 2041.6
-MPI Rank 1: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.2913s; samplesPerSecond = 2197.1
-MPI Rank 1: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3338s; samplesPerSecond = 1917.2
-MPI Rank 1: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3129s; samplesPerSecond = 2045.1
-MPI Rank 1: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3240s; samplesPerSecond = 1975.2
-MPI Rank 1: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3143s; samplesPerSecond = 2036.6
-MPI Rank 1: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3235s; samplesPerSecond = 1978.4
-MPI Rank 1: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3126s; samplesPerSecond = 2047.3
-MPI Rank 1: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3224s; samplesPerSecond = 1984.9
-MPI Rank 1: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3348s; samplesPerSecond = 1911.7
-MPI Rank 1: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3137s; samplesPerSecond = 2040.4
-MPI Rank 1: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.2720s; samplesPerSecond = 2352.9
-MPI Rank 1: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1800s; samplesPerSecond = 3555.3
-MPI Rank 1: 05/03/2016 15:52:27: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.2404s
+MPI Rank 1: 05/03/2016 14:46:07: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3203s; samplesPerSecond = 1998.1
+MPI Rank 1: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3149s; samplesPerSecond = 2032.6
+MPI Rank 1: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3171s; samplesPerSecond = 2018.4
+MPI Rank 1: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3090s; samplesPerSecond = 2071.1
+MPI Rank 1: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3127s; samplesPerSecond = 2046.6
+MPI Rank 1: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3143s; samplesPerSecond = 2036.2
+MPI Rank 1: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3128s; samplesPerSecond = 2046.2
+MPI Rank 1: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3146s; samplesPerSecond = 2034.5
+MPI Rank 1: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3127s; samplesPerSecond = 2046.6
+MPI Rank 1: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3121s; samplesPerSecond = 2050.6
+MPI Rank 1: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3141s; samplesPerSecond = 2037.4
+MPI Rank 1: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3143s; samplesPerSecond = 2036.3
+MPI Rank 1: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3129s; samplesPerSecond = 2045.1
+MPI Rank 1: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3145s; samplesPerSecond = 2035.1
+MPI Rank 1: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3166s; samplesPerSecond = 2021.7
+MPI Rank 1: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3127s; samplesPerSecond = 2047.0
+MPI Rank 1: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3110s; samplesPerSecond = 2057.6
+MPI Rank 1: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3133s; samplesPerSecond = 2042.5
+MPI Rank 1: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3162s; samplesPerSecond = 2024.0
+MPI Rank 1: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3108s; samplesPerSecond = 2059.0
+MPI Rank 1: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3129s; samplesPerSecond = 2045.2
+MPI Rank 1: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3128s; samplesPerSecond = 2045.8
+MPI Rank 1: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3136s; samplesPerSecond = 2040.8
+MPI Rank 1: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3130s; samplesPerSecond = 2044.7
+MPI Rank 1: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3232s; samplesPerSecond = 1980.1
+MPI Rank 1: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.2907s; samplesPerSecond = 2201.4
+MPI Rank 1: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3172s; samplesPerSecond = 2017.7
+MPI Rank 1: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3104s; samplesPerSecond = 2061.9
+MPI Rank 1: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3139s; samplesPerSecond = 2039.0
+MPI Rank 1: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3128s; samplesPerSecond = 2046.2
+MPI Rank 1: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3123s; samplesPerSecond = 2049.3
+MPI Rank 1: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3162s; samplesPerSecond = 2023.9
+MPI Rank 1: 05/03/2016 14:46:17: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.038s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:46:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5415s; samplesPerSecond = 4727.8
-MPI Rank 1: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5065s; samplesPerSecond = 5054.2
-MPI Rank 1: 05/03/2016 15:52:29:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5079s; samplesPerSecond = 5040.8
-MPI Rank 1: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5102s; samplesPerSecond = 5017.6
-MPI Rank 1: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5058s; samplesPerSecond = 5061.4
-MPI Rank 1: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5125s; samplesPerSecond = 4995.5
-MPI Rank 1: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5095s; samplesPerSecond = 5024.2
-MPI Rank 1: 05/03/2016 15:52:32:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5126s; samplesPerSecond = 4993.8
-MPI Rank 1: 05/03/2016 15:52:32: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.12598s
+MPI Rank 1: 05/03/2016 14:46:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:46:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5391s; samplesPerSecond = 4748.6
+MPI Rank 1: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5114s; samplesPerSecond = 5005.7
+MPI Rank 1: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5110s; samplesPerSecond = 5009.7
+MPI Rank 1: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5171s; samplesPerSecond = 4950.4
+MPI Rank 1: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5114s; samplesPerSecond = 5005.8
+MPI Rank 1: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5173s; samplesPerSecond = 4948.8
+MPI Rank 1: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5139s; samplesPerSecond = 4981.3
+MPI Rank 1: 05/03/2016 14:46:22:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5171s; samplesPerSecond = 4951.1
+MPI Rank 1: 05/03/2016 14:46:22: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.15646s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:32: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:46:22: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 15:52:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5615s; samplesPerSecond = 6557.8
-MPI Rank 1: 05/03/2016 15:52:35:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5278s; samplesPerSecond = 6702.5
-MPI Rank 1: 05/03/2016 15:52:35: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10683s
-MPI Rank 1: 05/03/2016 15:52:35: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:46:22: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:46:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5710s; samplesPerSecond = 6518.2
+MPI Rank 1: 05/03/2016 14:46:25:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5445s; samplesPerSecond = 6629.9
+MPI Rank 1: 05/03/2016 14:46:25: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.13248s
+MPI Rank 1: 05/03/2016 14:46:25: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:35: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:46:25: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:35: __COMPLETED__
-MPI Rank 2: 05/03/2016 15:52:12: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 15:52:12: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 15:52:12: Build info: 
+MPI Rank 1: 05/03/2016 14:46:25: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:46:02: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:46:02: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:46:02: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: 		Built time: May  3 2016 13:15:46
-MPI Rank 2: 05/03/2016 15:52:12: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 2: 05/03/2016 15:52:12: 		Build type: Release
-MPI Rank 2: 05/03/2016 15:52:12: 		Build target: GPU
-MPI Rank 2: 05/03/2016 15:52:12: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 15:52:12: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 15:52:12: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 15:52:12: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 15:52:12: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 15:52:12: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 15:52:12: 		Built by svcphil on cntk-muc01
-MPI Rank 2: 05/03/2016 15:52:12: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 15:52:12: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:46:02: 		Built time: May  3 2016 13:15:46
+MPI Rank 2: 05/03/2016 14:46:02: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 2: 05/03/2016 14:46:02: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:46:02: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:46:02: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:46:02: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:46:02: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:46:02: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:46:02: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:46:02: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:46:02: 		Built by svcphil on cntk-muc01
+MPI Rank 2: 05/03/2016 14:46:02: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:46:02: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: Running on cntk-muc01 at 2016/05/03 15:52:12
-MPI Rank 2: 05/03/2016 15:52:12: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: 05/03/2016 14:46:02: Running on cntk-muc01 at 2016/05/03 14:46:02
+MPI Rank 2: 05/03/2016 14:46:02: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:52:12: precision = "float"
+MPI Rank 2: 05/03/2016 14:46:02: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:46:02: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1270,30 +1270,28 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:46:02: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:52:12: precision = "float"
+MPI Rank 2: 05/03/2016 14:46:02: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:46:02: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1377,36 +1375,34 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:46:02: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:46:02: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1488,35 +1484,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 15:52:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 15:52:12: Commands: speechTrain
-MPI Rank 2: 05/03/2016 15:52:12: Precision = "double"
-MPI Rank 2: 05/03/2016 15:52:12: Using 2 CPU threads.
-MPI Rank 2: 05/03/2016 15:52:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 15:52:12: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 15:52:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 14:46:02: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:46:02: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:46:02: Precision = "double"
+MPI Rank 2: 05/03/2016 14:46:02: Using 2 CPU threads.
+MPI Rank 2: 05/03/2016 14:46:02: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:46:02: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 14:46:02: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: ##############################################################################
-MPI Rank 2: 05/03/2016 15:52:12: #                                                                            #
-MPI Rank 2: 05/03/2016 15:52:12: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 15:52:12: #                                                                            #
-MPI Rank 2: 05/03/2016 15:52:12: ##############################################################################
+MPI Rank 2: 05/03/2016 14:46:02: ##############################################################################
+MPI Rank 2: 05/03/2016 14:46:02: #                                                                            #
+MPI Rank 2: 05/03/2016 14:46:02: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:46:02: #                                                                            #
+MPI Rank 2: 05/03/2016 14:46:02: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:46:02: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:12: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:46:02: Creating virgin network.
 MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1569,14 +1564,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 14:46:03: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: Training criterion node(s):
-MPI Rank 2: 05/03/2016 15:52:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:46:03: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:46:03: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:46:03: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:46:03: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1584,103 +1579,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 000000CA24DD9A70: {[B1 Value[512 x 1]] }
-MPI Rank 2: 000000CA24DD9ED0: {[W1 Value[512 x 512]] }
-MPI Rank 2: 000000CA24DDA650: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 000000CA24DDA830: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 000000CA24DDAD30: {[W0 Value[512 x 363]] }
-MPI Rank 2: 000000CA24DDB2D0: {[B0 Value[512 x 1]] }
-MPI Rank 2: 000000CA279FA3F0: {[labels Value[132 x *]] }
-MPI Rank 2: 000000CA279FA490: {[W2 Value[132 x 512]] }
-MPI Rank 2: 000000CA279FA670: {[Prior Value[132]] }
-MPI Rank 2: 000000CA279FA7B0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 000000CA279FA850: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CA279FAA30: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CA279FAAD0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 000000CA279FACB0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 000000CA279FADF0: {[W0*features Value[512 x *]] }
-MPI Rank 2: 000000CA279FAE90: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CA279FAF30: {[B2 Value[132 x 1]] }
-MPI Rank 2: 000000CA279FAFD0: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 000000CA279FB1B0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 000000CA279FB250: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 000000CA279FB610: {[LogOfPrior Value[132]] }
-MPI Rank 2: 000000CA279FB930: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 000000CA279FBA70: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 000000CA279FBC50: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 000000CA279FBD90: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 000000CA279FBF70: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000CA279FC010: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000CA7F95DC90: {[features Value[363 x *]] }
+MPI Rank 2: 0000009A86000430: {[features Value[363 x *]] }
+MPI Rank 2: 0000009AA4528880: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0000009AA4528BA0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0000009AA4528E20: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0000009AA4529BE0: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0000009AA4529D20: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0000009AA452A040: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0000009AA71453C0: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0000009AA7145460: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0000009AA7145500: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0000009AA7145820: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0000009AA7145A00: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0000009AA7145C80: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0000009AA7145D20: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0000009AA7146040: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0000009AA71460E0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0000009AA7146180: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0000009AA7146220: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0000009AA7146360: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0000009AA7146680: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0000009AA7146860: {[Prior Value[132]] }
+MPI Rank 2: 0000009AA7146900: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0000009AA71469A0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0000009AA7146B80: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0000009AA7146C20: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0000009AA7146CC0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0000009AA7146D60: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0000009AA7146F40: {[labels Value[132 x *]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:46:03: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:13: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 15:52:13: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 15:52:13: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:46:03: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:46:03: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:46:03: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:17: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:46:07: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:46:07: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:17: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3245s; samplesPerSecond = 1972.4
-MPI Rank 2: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3237s; samplesPerSecond = 1977.2
-MPI Rank 2: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3027s; samplesPerSecond = 2114.3
-MPI Rank 2: 05/03/2016 15:52:18:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3129s; samplesPerSecond = 2045.1
-MPI Rank 2: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3139s; samplesPerSecond = 2039.0
-MPI Rank 2: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3139s; samplesPerSecond = 2039.0
-MPI Rank 2: 05/03/2016 15:52:19:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3129s; samplesPerSecond = 2045.3
-MPI Rank 2: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3133s; samplesPerSecond = 2042.8
-MPI Rank 2: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3132s; samplesPerSecond = 2043.4
-MPI Rank 2: 05/03/2016 15:52:20:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3030s; samplesPerSecond = 2112.4
-MPI Rank 2: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.2919s; samplesPerSecond = 2192.9
-MPI Rank 2: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3129s; samplesPerSecond = 2045.5
-MPI Rank 2: 05/03/2016 15:52:21:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3141s; samplesPerSecond = 2037.4
-MPI Rank 2: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3016s; samplesPerSecond = 2122.1
-MPI Rank 2: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3138s; samplesPerSecond = 2039.6
-MPI Rank 2: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3122s; samplesPerSecond = 2049.9
-MPI Rank 2: 05/03/2016 15:52:22:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3028s; samplesPerSecond = 2113.4
-MPI Rank 2: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3022s; samplesPerSecond = 2118.0
-MPI Rank 2: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3133s; samplesPerSecond = 2042.6
-MPI Rank 2: 05/03/2016 15:52:23:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3096s; samplesPerSecond = 2067.0
-MPI Rank 2: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3137s; samplesPerSecond = 2040.0
-MPI Rank 2: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3121s; samplesPerSecond = 2050.8
-MPI Rank 2: 05/03/2016 15:52:24:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3128s; samplesPerSecond = 2046.2
-MPI Rank 2: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3133s; samplesPerSecond = 2042.7
-MPI Rank 2: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3239s; samplesPerSecond = 1975.8
-MPI Rank 2: 05/03/2016 15:52:25:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3141s; samplesPerSecond = 2037.7
-MPI Rank 2: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.2922s; samplesPerSecond = 2190.1
-MPI Rank 2: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3126s; samplesPerSecond = 2047.7
-MPI Rank 2: 05/03/2016 15:52:26:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.2912s; samplesPerSecond = 2197.7
-MPI Rank 2: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3033s; samplesPerSecond = 2110.4
-MPI Rank 2: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3133s; samplesPerSecond = 2042.8
-MPI Rank 2: 05/03/2016 15:52:27:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3040s; samplesPerSecond = 2105.6
-MPI Rank 2: 05/03/2016 15:52:27: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=9.99729s
+MPI Rank 2: 05/03/2016 14:46:07: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3098s; samplesPerSecond = 2065.9
+MPI Rank 2: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3146s; samplesPerSecond = 2034.6
+MPI Rank 2: 05/03/2016 14:46:08:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3136s; samplesPerSecond = 2040.7
+MPI Rank 2: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3126s; samplesPerSecond = 2047.4
+MPI Rank 2: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3131s; samplesPerSecond = 2044.1
+MPI Rank 2: 05/03/2016 14:46:09:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3136s; samplesPerSecond = 2040.6
+MPI Rank 2: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3167s; samplesPerSecond = 2020.5
+MPI Rank 2: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3106s; samplesPerSecond = 2060.3
+MPI Rank 2: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3130s; samplesPerSecond = 2045.0
+MPI Rank 2: 05/03/2016 14:46:10:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3158s; samplesPerSecond = 2026.7
+MPI Rank 2: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3104s; samplesPerSecond = 2061.9
+MPI Rank 2: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3140s; samplesPerSecond = 2038.1
+MPI Rank 2: 05/03/2016 14:46:11:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3168s; samplesPerSecond = 2020.1
+MPI Rank 2: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3105s; samplesPerSecond = 2061.1
+MPI Rank 2: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3132s; samplesPerSecond = 2043.6
+MPI Rank 2: 05/03/2016 14:46:12:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3126s; samplesPerSecond = 2047.4
+MPI Rank 2: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3145s; samplesPerSecond = 2034.9
+MPI Rank 2: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3135s; samplesPerSecond = 2041.6
+MPI Rank 2: 05/03/2016 14:46:13:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3126s; samplesPerSecond = 2047.6
+MPI Rank 2: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3142s; samplesPerSecond = 2036.7
+MPI Rank 2: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3168s; samplesPerSecond = 2020.4
+MPI Rank 2: 05/03/2016 14:46:14:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3129s; samplesPerSecond = 2045.3
+MPI Rank 2: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3097s; samplesPerSecond = 2066.8
+MPI Rank 2: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3131s; samplesPerSecond = 2044.0
+MPI Rank 2: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3126s; samplesPerSecond = 2047.3
+MPI Rank 2: 05/03/2016 14:46:15:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3124s; samplesPerSecond = 2048.9
+MPI Rank 2: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3128s; samplesPerSecond = 2046.2
+MPI Rank 2: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3144s; samplesPerSecond = 2035.6
+MPI Rank 2: 05/03/2016 14:46:16:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3139s; samplesPerSecond = 2038.8
+MPI Rank 2: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3124s; samplesPerSecond = 2048.7
+MPI Rank 2: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3124s; samplesPerSecond = 2048.9
+MPI Rank 2: 05/03/2016 14:46:17:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3126s; samplesPerSecond = 2047.4
+MPI Rank 2: 05/03/2016 14:46:17: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0341s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:46:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:27: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5427s; samplesPerSecond = 4717.4
-MPI Rank 2: 05/03/2016 15:52:28:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5066s; samplesPerSecond = 5053.0
-MPI Rank 2: 05/03/2016 15:52:29:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5079s; samplesPerSecond = 5040.8
-MPI Rank 2: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5101s; samplesPerSecond = 5018.9
-MPI Rank 2: 05/03/2016 15:52:30:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5058s; samplesPerSecond = 5061.6
-MPI Rank 2: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5126s; samplesPerSecond = 4994.3
-MPI Rank 2: 05/03/2016 15:52:31:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5094s; samplesPerSecond = 5025.6
-MPI Rank 2: 05/03/2016 15:52:32:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5128s; samplesPerSecond = 4992.5
-MPI Rank 2: 05/03/2016 15:52:32: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.12562s
+MPI Rank 2: 05/03/2016 14:46:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:46:18:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13103792 * 2560; EvalErrorPrediction = 0.57265625 * 2560; time = 0.5395s; samplesPerSecond = 4744.8
+MPI Rank 2: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02964341 * 2560; EvalErrorPrediction = 0.55429688 * 2560; time = 0.5114s; samplesPerSecond = 5005.8
+MPI Rank 2: 05/03/2016 14:46:19:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.01756230 * 2560; EvalErrorPrediction = 0.55507812 * 2560; time = 0.5110s; samplesPerSecond = 5009.7
+MPI Rank 2: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93600349 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.5171s; samplesPerSecond = 4950.5
+MPI Rank 2: 05/03/2016 14:46:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.93223172 * 2560; EvalErrorPrediction = 0.53476563 * 2560; time = 0.5112s; samplesPerSecond = 5007.7
+MPI Rank 2: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.94164409 * 2560; EvalErrorPrediction = 0.54687500 * 2560; time = 0.5176s; samplesPerSecond = 4945.8
+MPI Rank 2: 05/03/2016 14:46:21:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.95564473 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5139s; samplesPerSecond = 4981.2
+MPI Rank 2: 05/03/2016 14:46:22:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.97189291 * 2560; EvalErrorPrediction = 0.54218750 * 2560; time = 0.5170s; samplesPerSecond = 4951.2
+MPI Rank 2: 05/03/2016 14:46:22: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.98945757 * 20480; EvalErrorPrediction = 0.54882813 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.1561s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:32: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:46:22: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 15:52:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5582s; samplesPerSecond = 6571.7
-MPI Rank 2: 05/03/2016 15:52:35:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5277s; samplesPerSecond = 6703.1
-MPI Rank 2: 05/03/2016 15:52:35: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10633s
-MPI Rank 2: 05/03/2016 15:52:35: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:46:22: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:46:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90740570 * 10240; EvalErrorPrediction = 0.52207031 * 10240; time = 1.5707s; samplesPerSecond = 6519.3
+MPI Rank 2: 05/03/2016 14:46:25:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90162239 * 10240; EvalErrorPrediction = 0.52011719 * 10240; time = 1.5454s; samplesPerSecond = 6626.1
+MPI Rank 2: 05/03/2016 14:46:25: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90451405 * 20480; EvalErrorPrediction = 0.52109375 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.13186s
+MPI Rank 2: 05/03/2016 14:46:25: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:35: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:46:25: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:35: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:46:25: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/run-test
index 86de681dd..2cc8a6f2d 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/Parallel1BitQuantization/run-test
@@ -8,6 +8,12 @@ LogFileName=stderr
 Instances=3
 NumCPUThreads=$(threadsPerInstance $Instances)
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
 cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
 ExitCode=$?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
index 9f2767704..cdb9f3028 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/.. OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -35,6 +35,7 @@ Build info:
 		Built by philly on 87698aadbc9d
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
+Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 -------------------------------------------------------------------
 Build info: 
 
@@ -52,39 +53,38 @@ Build info:
 		Built by philly on 87698aadbc9d
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
+MPIWrapper: initializing MPI
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
-MPIWrapper: initializing MPI
 ping [requestnodes (before change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 1 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+ping [requestnodes (before change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:04:09: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
-05/03/2016 18:04:10: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
-05/03/2016 18:04:10: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
+05/03/2016 18:02:06: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
+05/03/2016 18:02:06: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
+05/03/2016 18:02:07: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 31261 on
+mpiexec has exited due to process rank 0 with PID 3329 on
 node 87698aadbc9d exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -107,32 +107,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:09: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:09: Build info: 
+MPI Rank 0: 05/03/2016 18:02:06: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:02:06: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:04:09: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 0: 05/03/2016 18:04:09: 		Build type: release
-MPI Rank 0: 05/03/2016 18:04:09: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:04:09: 		With 1bit-SGD: yes
-MPI Rank 0: 05/03/2016 18:04:09: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:04:09: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:04:09: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:04:09: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:04:09: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:04:09: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:04:09: 		Built by philly on 87698aadbc9d
-MPI Rank 0: 05/03/2016 18:04:09: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:04:09: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:02:06: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:02:06: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 0: 05/03/2016 18:02:06: 		Build type: release
+MPI Rank 0: 05/03/2016 18:02:06: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:02:06: 		With 1bit-SGD: yes
+MPI Rank 0: 05/03/2016 18:02:06: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:02:06: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:02:06: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:02:06: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:02:06: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:02:06: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:02:06: 		Built by philly on 87698aadbc9d
+MPI Rank 0: 05/03/2016 18:02:06: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:02:06: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Running on localhost at 2016/05/03 18:04:09
-MPI Rank 0: 05/03/2016 18:04:09: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: 05/03/2016 18:02:06: Running on localhost at 2016/05/03 18:02:06
+MPI Rank 0: 05/03/2016 18:02:06: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:09: precision = "float"
+MPI Rank 0: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:06: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -222,14 +222,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -237,18 +235,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:09: precision = "float"
+MPI Rank 0: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:06: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -332,14 +330,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -347,24 +343,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -446,35 +442,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:04:09: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:04:09: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:04:09: Precision = "double"
-MPI Rank 0: 05/03/2016 18:04:09: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:04:09: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:04:09: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 18:04:09: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:06: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:02:06: Precision = "double"
+MPI Rank 0: 05/03/2016 18:02:06: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 18:02:06: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:02:06: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 18:02:06: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: ##############################################################################
-MPI Rank 0: 05/03/2016 18:04:09: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:09: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:04:09: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:09: ##############################################################################
+MPI Rank 0: 05/03/2016 18:02:06: ##############################################################################
+MPI Rank 0: 05/03/2016 18:02:06: #                                                                            #
+MPI Rank 0: 05/03/2016 18:02:06: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:02:06: #                                                                            #
+MPI Rank 0: 05/03/2016 18:02:06: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:02:06: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:02:06: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -526,14 +521,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 18:02:06: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:04:09: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:02:06: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:02:06: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:02:06: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:02:06: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -541,193 +536,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x2d09578: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x2d0aca8: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x2d1f848: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x2d43af8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x2d43cb8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x2d60438: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x2da9238: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x2da93f8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x2da95b8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x2daaa88: {[labels Value[132 x *]] }
-MPI Rank 0: 0x2dcf0b8: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x2ddd8c8: {[Prior Value[132]] }
-MPI Rank 0: 0x2df6e68: {[features Value[363 x *]] }
-MPI Rank 0: 0x2e031f8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x2e033b8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x2e03578: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0x2e03b58: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x2e03c48: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x2e0b828: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x2e1ca68: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x2e2d618: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x2e2d708: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x2e34e78: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x2e35038: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x2e351f8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x2e41e88: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x2e45cb8: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x2e47c18: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x2626438: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x2626ee8: {[Prior Value[132]] }
+MPI Rank 0: 0x262cf38: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x26a8f78: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0x26c5038: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x26c6348: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x26d3018: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x26d31d8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x26d3398: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x26d3558: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x26d7938: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x26fa708: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x26fba88: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x2706cb8: {[features Value[363 x *]] }
+MPI Rank 0: 0x27294e8: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x27298b8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x2729a08: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x272e828: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x272e9e8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x2738468: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x273d558: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x273d718: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x273d8d8: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0x27484d8: {[labels Value[132 x *]] }
+MPI Rank 0: 0x274a128: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x274a2e8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x274a428: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x274ee18: {[B2 Value[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:02:06: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:09: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:04:09: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:04:09: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:02:06: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:02:06: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:02:06: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:15: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:02:07: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:02:08: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:16: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.6199s; samplesPerSecond = 1032.4
-MPI Rank 0: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3696s; samplesPerSecond = 1731.7
-MPI Rank 0: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3446s; samplesPerSecond = 1857.2
-MPI Rank 0: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3520s; samplesPerSecond = 1818.2
-MPI Rank 0: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3309s; samplesPerSecond = 1934.4
-MPI Rank 0: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3481s; samplesPerSecond = 1838.5
-MPI Rank 0: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.5382s; samplesPerSecond = 1189.1
-MPI Rank 0: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3800s; samplesPerSecond = 1684.1
-MPI Rank 0: 05/03/2016 18:04:20:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3593s; samplesPerSecond = 1781.3
-MPI Rank 0: 05/03/2016 18:04:20:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3956s; samplesPerSecond = 1618.0
-MPI Rank 0: 05/03/2016 18:04:20:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3213s; samplesPerSecond = 1992.1
-MPI Rank 0: 05/03/2016 18:04:21:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.5402s; samplesPerSecond = 1184.7
-MPI Rank 0: 05/03/2016 18:04:21:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3314s; samplesPerSecond = 1931.3
-MPI Rank 0: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3386s; samplesPerSecond = 1889.9
-MPI Rank 0: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.2958s; samplesPerSecond = 2164.0
-MPI Rank 0: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3279s; samplesPerSecond = 1951.9
-MPI Rank 0: 05/03/2016 18:04:23:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3061s; samplesPerSecond = 2091.0
-MPI Rank 0: 05/03/2016 18:04:23:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.5362s; samplesPerSecond = 1193.7
-MPI Rank 0: 05/03/2016 18:04:23:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3020s; samplesPerSecond = 2119.2
-MPI Rank 0: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3589s; samplesPerSecond = 1783.3
-MPI Rank 0: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3998s; samplesPerSecond = 1600.8
-MPI Rank 0: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3304s; samplesPerSecond = 1936.9
-MPI Rank 0: 05/03/2016 18:04:25:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3395s; samplesPerSecond = 1885.0
-MPI Rank 0: 05/03/2016 18:04:25:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.5208s; samplesPerSecond = 1228.9
-MPI Rank 0: 05/03/2016 18:04:26:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.2931s; samplesPerSecond = 2183.6
-MPI Rank 0: 05/03/2016 18:04:26:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3645s; samplesPerSecond = 1756.0
-MPI Rank 0: 05/03/2016 18:04:26:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3727s; samplesPerSecond = 1717.2
-MPI Rank 0: 05/03/2016 18:04:27:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3552s; samplesPerSecond = 1801.9
-MPI Rank 0: 05/03/2016 18:04:27:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3437s; samplesPerSecond = 1862.2
-MPI Rank 0: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.5356s; samplesPerSecond = 1194.9
-MPI Rank 0: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.2779s; samplesPerSecond = 2302.6
-MPI Rank 0: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3355s; samplesPerSecond = 1907.7
-MPI Rank 0: 05/03/2016 18:04:28: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.225s
-MPI Rank 0: 05/03/2016 18:04:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:02:08: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.3502s; samplesPerSecond = 1827.8
+MPI Rank 0: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3408s; samplesPerSecond = 1878.2
+MPI Rank 0: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3428s; samplesPerSecond = 1866.8
+MPI Rank 0: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3817s; samplesPerSecond = 1676.7
+MPI Rank 0: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3013s; samplesPerSecond = 2124.4
+MPI Rank 0: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.5152s; samplesPerSecond = 1242.3
+MPI Rank 0: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.4012s; samplesPerSecond = 1595.1
+MPI Rank 0: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3525s; samplesPerSecond = 1815.6
+MPI Rank 0: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3342s; samplesPerSecond = 1915.3
+MPI Rank 0: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3635s; samplesPerSecond = 1760.6
+MPI Rank 0: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3237s; samplesPerSecond = 1977.2
+MPI Rank 0: 05/03/2016 18:02:13:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.5172s; samplesPerSecond = 1237.3
+MPI Rank 0: 05/03/2016 18:02:13:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3744s; samplesPerSecond = 1709.4
+MPI Rank 0: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3794s; samplesPerSecond = 1687.0
+MPI Rank 0: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.3647s; samplesPerSecond = 1754.9
+MPI Rank 0: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3568s; samplesPerSecond = 1794.0
+MPI Rank 0: 05/03/2016 18:02:15:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.5262s; samplesPerSecond = 1216.2
+MPI Rank 0: 05/03/2016 18:02:15:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.2908s; samplesPerSecond = 2200.7
+MPI Rank 0: 05/03/2016 18:02:15:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3012s; samplesPerSecond = 2125.1
+MPI Rank 0: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3205s; samplesPerSecond = 1997.1
+MPI Rank 0: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3070s; samplesPerSecond = 2084.6
+MPI Rank 0: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3437s; samplesPerSecond = 1862.2
+MPI Rank 0: 05/03/2016 18:02:17:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3165s; samplesPerSecond = 2022.3
+MPI Rank 0: 05/03/2016 18:02:17:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.5493s; samplesPerSecond = 1165.2
+MPI Rank 0: 05/03/2016 18:02:18:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3432s; samplesPerSecond = 1864.9
+MPI Rank 0: 05/03/2016 18:02:18:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3349s; samplesPerSecond = 1911.2
+MPI Rank 0: 05/03/2016 18:02:18:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3442s; samplesPerSecond = 1859.4
+MPI Rank 0: 05/03/2016 18:02:19:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3439s; samplesPerSecond = 1860.9
+MPI Rank 0: 05/03/2016 18:02:19:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3966s; samplesPerSecond = 1613.5
+MPI Rank 0: 05/03/2016 18:02:20:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.5417s; samplesPerSecond = 1181.5
+MPI Rank 0: 05/03/2016 18:02:20:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3347s; samplesPerSecond = 1912.2
+MPI Rank 0: 05/03/2016 18:02:20:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3345s; samplesPerSecond = 1913.1
+MPI Rank 0: 05/03/2016 18:02:20: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=11.9341s
+MPI Rank 0: 05/03/2016 18:02:21: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:29: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:02:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:29: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.018405
+MPI Rank 0: 05/03/2016 18:02:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.018134
 MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.030014
-MPI Rank 0: 05/03/2016 18:04:29:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.7270s; samplesPerSecond = 3169.0
+MPI Rank 0: Actual gradient aggregation time: 0.022183
+MPI Rank 0: 05/03/2016 18:02:21:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.5720s; samplesPerSecond = 4027.9
 MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.045189
+MPI Rank 0: Actual gradient aggregation time: 0.03404
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.05465
+MPI Rank 0: 05/03/2016 18:02:22:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9286s; samplesPerSecond = 2756.8
 MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.027735
-MPI Rank 0: 05/03/2016 18:04:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9141s; samplesPerSecond = 2800.6
+MPI Rank 0: Actual gradient aggregation time: 0.031899
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.051666
+MPI Rank 0: 05/03/2016 18:02:23:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.8002s; samplesPerSecond = 3199.2
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.024361
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.023041
+MPI Rank 0: 05/03/2016 18:02:24:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.8711s; samplesPerSecond = 2938.7
+MPI Rank 0: Async gradient aggregation wait time: 0.002888
+MPI Rank 0: Actual gradient aggregation time: 0.069783
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.037991
+MPI Rank 0: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.6862s; samplesPerSecond = 3730.8
 MPI Rank 0: Async gradient aggregation wait time: 8e-06
-MPI Rank 0: Actual gradient aggregation time: 0.039128
+MPI Rank 0: Actual gradient aggregation time: 0.028863
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.026087
-MPI Rank 0: 05/03/2016 18:04:31:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.8005s; samplesPerSecond = 3198.1
+MPI Rank 0: Actual gradient aggregation time: 0.023754
+MPI Rank 0: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7826s; samplesPerSecond = 3271.1
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.03054
+MPI Rank 0: Actual gradient aggregation time: 0.021724
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.078046
-MPI Rank 0: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.9069s; samplesPerSecond = 2822.7
-MPI Rank 0: Async gradient aggregation wait time: 7e-06
-MPI Rank 0: Actual gradient aggregation time: 0.021587
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.013834
-MPI Rank 0: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.6063s; samplesPerSecond = 4222.4
-MPI Rank 0: Async gradient aggregation wait time: 7e-06
-MPI Rank 0: Actual gradient aggregation time: 0.032218
-MPI Rank 0: Async gradient aggregation wait time: 0.06601
-MPI Rank 0: Actual gradient aggregation time: 0.087916
-MPI Rank 0: 05/03/2016 18:04:33:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7787s; samplesPerSecond = 3287.5
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.026842
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.093817
-MPI Rank 0: 05/03/2016 18:04:34:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.9528s; samplesPerSecond = 2686.7
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.03452
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.022839
-MPI Rank 0: 05/03/2016 18:04:35:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.6696s; samplesPerSecond = 3823.0
-MPI Rank 0: Async gradient aggregation wait time: 0.011139
-MPI Rank 0: Actual gradient aggregation time: 0.038661
-MPI Rank 0: 05/03/2016 18:04:35: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.4113s
-MPI Rank 0: 05/03/2016 18:04:35: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:35: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:35: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.021894
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.021335
-MPI Rank 0: 05/03/2016 18:04:37:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 2.0040s; samplesPerSecond = 4598.8
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.077879
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.017313
-MPI Rank 0: 05/03/2016 18:04:39:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.1899s; samplesPerSecond = 4676.1
-MPI Rank 0: 05/03/2016 18:04:39: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.26084s
-MPI Rank 0: 05/03/2016 18:04:39: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:39: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.038526
+MPI Rank 0: Actual gradient aggregation time: 0.021106
+MPI Rank 0: 05/03/2016 18:02:26:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.8220s; samplesPerSecond = 3114.4
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.021264
-MPI Rank 0: 05/03/2016 18:04:42:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.4566s; samplesPerSecond = 3751.6
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.021984
+MPI Rank 0: Actual gradient aggregation time: 0.068694
+MPI Rank 0: Async gradient aggregation wait time: 9e-06
+MPI Rank 0: Actual gradient aggregation time: 0.020728
+MPI Rank 0: 05/03/2016 18:02:27:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.6911s; samplesPerSecond = 3704.5
+MPI Rank 0: Async gradient aggregation wait time: 0.155668
+MPI Rank 0: Actual gradient aggregation time: 0.03222
+MPI Rank 0: 05/03/2016 18:02:27: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.35303s
+MPI Rank 0: 05/03/2016 18:02:27: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 18:02:27: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 18:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.021792
-MPI Rank 0: 05/03/2016 18:04:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.0225s; samplesPerSecond = 5063.0
-MPI Rank 0: Async gradient aggregation wait time: 0.034378
-MPI Rank 0: 05/03/2016 18:04:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.53527s
-MPI Rank 0: 05/03/2016 18:04:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:04:44: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: Actual gradient aggregation time: 0.020857
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.034159
+MPI Rank 0: 05/03/2016 18:02:29:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 2.2915s; samplesPerSecond = 4021.9
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.021172
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.023852
+MPI Rank 0: 05/03/2016 18:02:32:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.2645s; samplesPerSecond = 4522.1
+MPI Rank 0: 05/03/2016 18:02:32: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.59713s
+MPI Rank 0: 05/03/2016 18:02:32: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:02:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:04:10: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:04:10: Build info: 
+MPI Rank 0: 05/03/2016 18:02:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.023914
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.012821
+MPI Rank 0: 05/03/2016 18:02:34:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.1974s; samplesPerSecond = 4194.0
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.026195
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.022546
+MPI Rank 0: 05/03/2016 18:02:36:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.0525s; samplesPerSecond = 4989.1
+MPI Rank 0: Async gradient aggregation wait time: 0.070156
+MPI Rank 0: 05/03/2016 18:02:36: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.44132s
+MPI Rank 0: 05/03/2016 18:02:36: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:02:36: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 18:02:36: Action "train" complete.
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 18:02:36: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:02:06: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:02:06: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:04:10: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 1: 05/03/2016 18:04:10: 		Build type: release
-MPI Rank 1: 05/03/2016 18:04:10: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:04:10: 		With 1bit-SGD: yes
-MPI Rank 1: 05/03/2016 18:04:10: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:04:10: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:04:10: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:04:10: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:04:10: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:04:10: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:04:10: 		Built by philly on 87698aadbc9d
-MPI Rank 1: 05/03/2016 18:04:10: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:04:10: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:02:06: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:02:06: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 1: 05/03/2016 18:02:06: 		Build type: release
+MPI Rank 1: 05/03/2016 18:02:06: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:02:06: 		With 1bit-SGD: yes
+MPI Rank 1: 05/03/2016 18:02:06: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:02:06: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:02:06: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:02:06: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:02:06: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:02:06: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:02:06: 		Built by philly on 87698aadbc9d
+MPI Rank 1: 05/03/2016 18:02:06: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:02:06: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Running on localhost at 2016/05/03 18:04:10
-MPI Rank 1: 05/03/2016 18:04:10: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: 05/03/2016 18:02:06: Running on localhost at 2016/05/03 18:02:06
+MPI Rank 1: 05/03/2016 18:02:06: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:10: precision = "float"
+MPI Rank 1: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:06: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -817,14 +818,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -832,18 +831,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:10: precision = "float"
+MPI Rank 1: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:06: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -927,14 +926,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -942,24 +939,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:06: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1041,35 +1038,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:04:10: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:04:10: Precision = "double"
-MPI Rank 1: 05/03/2016 18:04:10: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:04:10: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:04:10: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 18:04:10: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 18:02:06: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:06: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:02:06: Precision = "double"
+MPI Rank 1: 05/03/2016 18:02:06: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 18:02:06: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:02:06: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 18:02:06: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: ##############################################################################
-MPI Rank 1: 05/03/2016 18:04:10: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:10: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:04:10: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:10: ##############################################################################
+MPI Rank 1: 05/03/2016 18:02:06: ##############################################################################
+MPI Rank 1: 05/03/2016 18:02:06: #                                                                            #
+MPI Rank 1: 05/03/2016 18:02:06: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:02:06: #                                                                            #
+MPI Rank 1: 05/03/2016 18:02:06: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:02:06: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:02:06: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1121,14 +1117,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 18:02:06: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:04:10: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:02:06: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:02:06: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:02:06: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:02:06: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1136,189 +1132,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x2383328: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x2385f48: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x2388228: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x23920e8: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x239af18: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x239b078: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x239b238: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x23a3a68: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x23a3bd8: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x23af368: {[labels Value[132 x *]] }
-MPI Rank 1: 0x23b6968: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x23dc0e8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x23dc2a8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x23dc468: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x24348d8: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x245dec8: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x245e028: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x245e1e8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x245e3a8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x247fab8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x2495a68: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x249ae68: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x249b028: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x249b1e8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x249fe88: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0x24b2598: {[features Value[363 x *]] }
-MPI Rank 1: 0x24b28d8: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x24bca98: {[Prior Value[132]] }
+MPI Rank 1: 0x1f78068: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0x1f78a98: {[Prior Value[132]] }
+MPI Rank 1: 0x1f7bfb8: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x1fd5138: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x1fd52f8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x2016398: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x2016558: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x2017eb8: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x2024c48: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x2024e08: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x2024fc8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x20588e8: {[features Value[363 x *]] }
+MPI Rank 1: 0x207b0b8: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x207b1f8: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x207b408: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x207b5c8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x207b788: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x207b948: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x20800a8: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x2080298: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x2080458: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x208a098: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x2096a88: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x209a0c8: {[labels Value[132 x *]] }
+MPI Rank 1: 0x209aab8: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x209bd28: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x209bdd8: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0x20a0988: {[B2 Value[132 x 1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:02:06: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:10: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:04:10: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:04:10: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:02:06: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:02:06: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:02:06: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:12: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:02:07: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:02:08: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:16: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:04:16:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1894s; samplesPerSecond = 3380.0
-MPI Rank 1: 05/03/2016 18:04:16:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1044s; samplesPerSecond = 6131.7
-MPI Rank 1: 05/03/2016 18:04:16:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1088s; samplesPerSecond = 5881.3
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.1045s; samplesPerSecond = 6127.2
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.1044s; samplesPerSecond = 6130.2
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1046s; samplesPerSecond = 6115.7
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.1043s; samplesPerSecond = 6133.7
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.1043s; samplesPerSecond = 6138.8
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1041s; samplesPerSecond = 6145.2
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.1048s; samplesPerSecond = 6104.0
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.1043s; samplesPerSecond = 6135.6
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.1041s; samplesPerSecond = 6145.0
-MPI Rank 1: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.1041s; samplesPerSecond = 6148.8
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.1045s; samplesPerSecond = 6127.3
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.1043s; samplesPerSecond = 6135.0
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.1055s; samplesPerSecond = 6067.1
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1039s; samplesPerSecond = 6160.4
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.1048s; samplesPerSecond = 6107.7
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.1046s; samplesPerSecond = 6119.9
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.1044s; samplesPerSecond = 6128.9
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.1043s; samplesPerSecond = 6135.4
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1047s; samplesPerSecond = 6115.3
-MPI Rank 1: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.1045s; samplesPerSecond = 6126.6
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1044s; samplesPerSecond = 6133.0
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.1044s; samplesPerSecond = 6130.4
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.1053s; samplesPerSecond = 6079.6
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.1046s; samplesPerSecond = 6116.9
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.1062s; samplesPerSecond = 6028.7
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1045s; samplesPerSecond = 6124.6
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1072s; samplesPerSecond = 5968.4
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.1043s; samplesPerSecond = 6136.7
-MPI Rank 1: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1042s; samplesPerSecond = 6139.4
-MPI Rank 1: 05/03/2016 18:04:19: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.48105s
+MPI Rank 1: 05/03/2016 18:02:08: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 18:02:08:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1097s; samplesPerSecond = 5835.6
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1048s; samplesPerSecond = 6105.2
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1050s; samplesPerSecond = 6095.8
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.1055s; samplesPerSecond = 6065.2
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.1052s; samplesPerSecond = 6085.5
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1051s; samplesPerSecond = 6089.4
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.1072s; samplesPerSecond = 5967.6
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.1052s; samplesPerSecond = 6082.1
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1051s; samplesPerSecond = 6087.6
+MPI Rank 1: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.1050s; samplesPerSecond = 6095.9
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.1054s; samplesPerSecond = 6070.6
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.1052s; samplesPerSecond = 6081.9
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.1054s; samplesPerSecond = 6074.8
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.1057s; samplesPerSecond = 6054.5
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.1052s; samplesPerSecond = 6081.0
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.1051s; samplesPerSecond = 6090.0
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1057s; samplesPerSecond = 6053.5
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.1052s; samplesPerSecond = 6080.9
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.1046s; samplesPerSecond = 6118.6
+MPI Rank 1: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.1060s; samplesPerSecond = 6040.0
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.1076s; samplesPerSecond = 5945.6
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1057s; samplesPerSecond = 6055.2
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.1066s; samplesPerSecond = 6005.8
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1080s; samplesPerSecond = 5928.0
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.1075s; samplesPerSecond = 5954.1
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.1074s; samplesPerSecond = 5956.6
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.1074s; samplesPerSecond = 5956.6
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.1083s; samplesPerSecond = 5909.2
+MPI Rank 1: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1074s; samplesPerSecond = 5959.9
+MPI Rank 1: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1070s; samplesPerSecond = 5983.9
+MPI Rank 1: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.1075s; samplesPerSecond = 5955.6
+MPI Rank 1: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1068s; samplesPerSecond = 5993.5
+MPI Rank 1: 05/03/2016 18:02:12: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.40411s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:29: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:02:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:29: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.090824
-MPI Rank 1: Async gradient aggregation wait time: 0.054673
-MPI Rank 1: Actual gradient aggregation time: 0.066804
-MPI Rank 1: 05/03/2016 18:04:29:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.6778s; samplesPerSecond = 3399.4
-MPI Rank 1: Async gradient aggregation wait time: 0.060413
-MPI Rank 1: Actual gradient aggregation time: 0.073654
-MPI Rank 1: Async gradient aggregation wait time: 0.084949
-MPI Rank 1: Actual gradient aggregation time: 0.040694
-MPI Rank 1: 05/03/2016 18:04:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9418s; samplesPerSecond = 2718.3
-MPI Rank 1: Async gradient aggregation wait time: 0.05278
-MPI Rank 1: Actual gradient aggregation time: 0.040451
-MPI Rank 1: Async gradient aggregation wait time: 0.069296
-MPI Rank 1: Actual gradient aggregation time: 0.094717
-MPI Rank 1: 05/03/2016 18:04:31:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.7911s; samplesPerSecond = 3236.1
-MPI Rank 1: Async gradient aggregation wait time: 0.053334
-MPI Rank 1: Actual gradient aggregation time: 0.035653
-MPI Rank 1: Async gradient aggregation wait time: 0.035189
-MPI Rank 1: Actual gradient aggregation time: 0.078759
-MPI Rank 1: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.6557s; samplesPerSecond = 3904.2
-MPI Rank 1: Async gradient aggregation wait time: 0.299382
-MPI Rank 1: Actual gradient aggregation time: 0.048604
-MPI Rank 1: Async gradient aggregation wait time: 0.052967
-MPI Rank 1: Actual gradient aggregation time: 0.039171
-MPI Rank 1: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.8837s; samplesPerSecond = 2896.9
-MPI Rank 1: Async gradient aggregation wait time: 0.031398
-MPI Rank 1: Actual gradient aggregation time: 0.058835
-MPI Rank 1: Async gradient aggregation wait time: 0.19318
-MPI Rank 1: Actual gradient aggregation time: 0.039099
-MPI Rank 1: 05/03/2016 18:04:33:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7550s; samplesPerSecond = 3390.9
-MPI Rank 1: Async gradient aggregation wait time: 0.051484
-MPI Rank 1: Actual gradient aggregation time: 0.045649
-MPI Rank 1: Async gradient aggregation wait time: 0.050666
-MPI Rank 1: Actual gradient aggregation time: 0.105925
-MPI Rank 1: 05/03/2016 18:04:34:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.9070s; samplesPerSecond = 2822.5
-MPI Rank 1: Async gradient aggregation wait time: 0.10386
-MPI Rank 1: Actual gradient aggregation time: 0.081907
-MPI Rank 1: Async gradient aggregation wait time: 0.0659
-MPI Rank 1: Actual gradient aggregation time: 0.083507
-MPI Rank 1: 05/03/2016 18:04:35:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.6961s; samplesPerSecond = 3677.4
-MPI Rank 1: Async gradient aggregation wait time: 0.059794
-MPI Rank 1: Actual gradient aggregation time: 0.040385
-MPI Rank 1: 05/03/2016 18:04:35: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.41135s
+MPI Rank 1: 05/03/2016 18:02:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.05876
+MPI Rank 1: Async gradient aggregation wait time: 0.045616
+MPI Rank 1: Actual gradient aggregation time: 0.057555
+MPI Rank 1: 05/03/2016 18:02:21:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.5356s; samplesPerSecond = 4301.7
+MPI Rank 1: Async gradient aggregation wait time: 0.050643
+MPI Rank 1: Actual gradient aggregation time: 0.25128
+MPI Rank 1: Async gradient aggregation wait time: 0.065594
+MPI Rank 1: Actual gradient aggregation time: 0.066846
+MPI Rank 1: 05/03/2016 18:02:22:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9252s; samplesPerSecond = 2767.0
+MPI Rank 1: Async gradient aggregation wait time: 0.040743
+MPI Rank 1: Actual gradient aggregation time: 0.090699
+MPI Rank 1: Async gradient aggregation wait time: 0.025597
+MPI Rank 1: Actual gradient aggregation time: 0.070096
+MPI Rank 1: 05/03/2016 18:02:23:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.7812s; samplesPerSecond = 3277.0
+MPI Rank 1: Async gradient aggregation wait time: 0.058369
+MPI Rank 1: Actual gradient aggregation time: 0.063514
+MPI Rank 1: Async gradient aggregation wait time: 0.03537
+MPI Rank 1: Actual gradient aggregation time: 0.101552
+MPI Rank 1: 05/03/2016 18:02:24:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.8902s; samplesPerSecond = 2875.8
+MPI Rank 1: Async gradient aggregation wait time: 0.081955
+MPI Rank 1: Actual gradient aggregation time: 0.05362
+MPI Rank 1: Async gradient aggregation wait time: 0.051024
+MPI Rank 1: Actual gradient aggregation time: 0.069824
+MPI Rank 1: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.7034s; samplesPerSecond = 3639.4
+MPI Rank 1: Async gradient aggregation wait time: 0.051375
+MPI Rank 1: Actual gradient aggregation time: 0.087201
+MPI Rank 1: Async gradient aggregation wait time: 0.053592
+MPI Rank 1: Actual gradient aggregation time: 0.054472
+MPI Rank 1: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7669s; samplesPerSecond = 3338.0
+MPI Rank 1: Async gradient aggregation wait time: 0.049303
+MPI Rank 1: Actual gradient aggregation time: 0.061043
+MPI Rank 1: Async gradient aggregation wait time: 0.088495
+MPI Rank 1: Actual gradient aggregation time: 0.195807
+MPI Rank 1: 05/03/2016 18:02:26:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.8384s; samplesPerSecond = 3053.4
+MPI Rank 1: Async gradient aggregation wait time: 0.032181
+MPI Rank 1: Actual gradient aggregation time: 0.091144
+MPI Rank 1: Async gradient aggregation wait time: 0.045033
+MPI Rank 1: Actual gradient aggregation time: 0.067563
+MPI Rank 1: 05/03/2016 18:02:27:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.6779s; samplesPerSecond = 3776.6
+MPI Rank 1: Async gradient aggregation wait time: 0.189937
+MPI Rank 1: Actual gradient aggregation time: 0.041092
+MPI Rank 1: 05/03/2016 18:02:27: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.35292s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:35: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:02:27: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:35: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.12569
-MPI Rank 1: Actual gradient aggregation time: 0.173699
-MPI Rank 1: Async gradient aggregation wait time: 0.138334
-MPI Rank 1: Actual gradient aggregation time: 0.188424
-MPI Rank 1: 05/03/2016 18:04:37:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 1.7956s; samplesPerSecond = 5132.5
-MPI Rank 1: Async gradient aggregation wait time: 0.103505
-MPI Rank 1: Actual gradient aggregation time: 0.228216
-MPI Rank 1: Async gradient aggregation wait time: 0.161605
-MPI Rank 1: Actual gradient aggregation time: 0.189366
-MPI Rank 1: 05/03/2016 18:04:39:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.1655s; samplesPerSecond = 4728.7
-MPI Rank 1: 05/03/2016 18:04:39: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.26366s
+MPI Rank 1: 05/03/2016 18:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.17642
+MPI Rank 1: Actual gradient aggregation time: 0.359079
+MPI Rank 1: Async gradient aggregation wait time: 0.189325
+MPI Rank 1: Actual gradient aggregation time: 0.215196
+MPI Rank 1: 05/03/2016 18:02:29:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 2.1092s; samplesPerSecond = 4369.5
+MPI Rank 1: Async gradient aggregation wait time: 0.233182
+MPI Rank 1: Actual gradient aggregation time: 0.280567
+MPI Rank 1: Async gradient aggregation wait time: 0.206845
+MPI Rank 1: Actual gradient aggregation time: 0.160801
+MPI Rank 1: 05/03/2016 18:02:32:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.2900s; samplesPerSecond = 4471.5
+MPI Rank 1: 05/03/2016 18:02:32: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.5971s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:02:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:39: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.219998
-MPI Rank 1: Actual gradient aggregation time: 0.278086
-MPI Rank 1: Async gradient aggregation wait time: 0.155825
-MPI Rank 1: Actual gradient aggregation time: 0.166478
-MPI Rank 1: 05/03/2016 18:04:42:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.2195s; samplesPerSecond = 4152.3
-MPI Rank 1: Async gradient aggregation wait time: 0.182414
-MPI Rank 1: Actual gradient aggregation time: 0.197069
-MPI Rank 1: Async gradient aggregation wait time: 0.095156
-MPI Rank 1: Actual gradient aggregation time: 0.228056
-MPI Rank 1: 05/03/2016 18:04:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.1128s; samplesPerSecond = 4846.7
-MPI Rank 1: Async gradient aggregation wait time: 0.020919
-MPI Rank 1: 05/03/2016 18:04:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.52146s
-MPI Rank 1: 05/03/2016 18:04:44: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:02:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.102873
+MPI Rank 1: Actual gradient aggregation time: 0.166712
+MPI Rank 1: Async gradient aggregation wait time: 0.204005
+MPI Rank 1: Actual gradient aggregation time: 0.212461
+MPI Rank 1: 05/03/2016 18:02:34:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.0858s; samplesPerSecond = 4418.4
+MPI Rank 1: Async gradient aggregation wait time: 0.231732
+MPI Rank 1: Actual gradient aggregation time: 0.1824
+MPI Rank 1: Async gradient aggregation wait time: 0.119357
+MPI Rank 1: Actual gradient aggregation time: 0.126232
+MPI Rank 1: 05/03/2016 18:02:36:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.1719s; samplesPerSecond = 4714.8
+MPI Rank 1: Async gradient aggregation wait time: 0.050646
+MPI Rank 1: 05/03/2016 18:02:36: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.42166s
+MPI Rank 1: 05/03/2016 18:02:36: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:44: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:02:36: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:44: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:04:10: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:04:10: Build info: 
+MPI Rank 1: 05/03/2016 18:02:36: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:02:07: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:02:07: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:04:10: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 2: 05/03/2016 18:04:10: 		Build type: release
-MPI Rank 2: 05/03/2016 18:04:10: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:04:10: 		With 1bit-SGD: yes
-MPI Rank 2: 05/03/2016 18:04:10: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:04:10: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:04:10: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:04:10: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:04:10: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:04:10: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:04:10: 		Built by philly on 87698aadbc9d
-MPI Rank 2: 05/03/2016 18:04:10: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:04:10: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:02:07: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:02:07: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 2: 05/03/2016 18:02:07: 		Build type: release
+MPI Rank 2: 05/03/2016 18:02:07: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:02:07: 		With 1bit-SGD: yes
+MPI Rank 2: 05/03/2016 18:02:07: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:02:07: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:02:07: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:02:07: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:02:07: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:02:07: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:02:07: 		Built by philly on 87698aadbc9d
+MPI Rank 2: 05/03/2016 18:02:07: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:02:07: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: Running on localhost at 2016/05/03 18:04:10
-MPI Rank 2: 05/03/2016 18:04:10: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: 05/03/2016 18:02:07: Running on localhost at 2016/05/03 18:02:07
+MPI Rank 2: 05/03/2016 18:02:07: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:10: precision = "float"
+MPI Rank 2: 05/03/2016 18:02:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:07: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1408,14 +1410,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1423,18 +1423,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:10: precision = "float"
+MPI Rank 2: 05/03/2016 18:02:07: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:07: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1518,14 +1518,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1533,24 +1531,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:07: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:07: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1632,35 +1630,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:04:10: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:04:10: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:04:10: Precision = "double"
-MPI Rank 2: 05/03/2016 18:04:10: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:04:10: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:04:10: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 18:04:10: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 18:02:07: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:07: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:02:07: Precision = "double"
+MPI Rank 2: 05/03/2016 18:02:07: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 18:02:07: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:02:07: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 18:02:07: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: ##############################################################################
-MPI Rank 2: 05/03/2016 18:04:10: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:10: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:04:10: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:10: ##############################################################################
+MPI Rank 2: 05/03/2016 18:02:07: ##############################################################################
+MPI Rank 2: 05/03/2016 18:02:07: #                                                                            #
+MPI Rank 2: 05/03/2016 18:02:07: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:02:07: #                                                                            #
+MPI Rank 2: 05/03/2016 18:02:07: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:02:07: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:10: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:02:07: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1712,14 +1709,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 18:02:07: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:04:11: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:02:07: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:02:07: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:02:07: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:02:07: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1727,160 +1724,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x1dc4148: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x1dc58a8: {[features Value[363 x *]] }
-MPI Rank 2: 0x1dc6c48: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x1dc9db8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1dc9fc8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x1dd3458: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x1e638c8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x1e6ed58: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x1e98398: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x1e98808: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x1e9a568: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x1eace38: {[labels Value[132 x *]] }
-MPI Rank 2: 0x1ebe848: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x1ebea08: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x1ed4ad8: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x1ed9c18: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1ed9d78: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1ed9f38: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x1edee28: {[Prior Value[132]] }
-MPI Rank 2: 0x1ee8218: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1ee83d8: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0x1ef7298: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x1ef7458: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x1ef7618: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x1efc7c8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x1efc988: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1efd0b8: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x1efd178: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x1c8cf28: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x1ce6018: {[Prior Value[132]] }
+MPI Rank 2: 0x1cee238: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x1cee3f8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x1cee5b8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x1cee778: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x1d08fe8: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x1d263b8: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x1d33048: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x1d3db18: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0x1d49048: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0x1d5bbb8: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x1d66d28: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x1d6caf8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x1d6ccb8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x1d6ce78: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x1d8db78: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x1d8dd88: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x1d8df48: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x1d8e108: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x1d8e378: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0x1d8e4d8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x1d8e698: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x1d9e788: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x1da6768: {[features Value[363 x *]] }
+MPI Rank 2: 0x1da9418: {[labels Value[132 x *]] }
+MPI Rank 2: 0x1dab758: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x1db1568: {[B0 Value[512 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:02:07: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:11: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:04:11: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:04:11: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:02:07: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:02:07: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:02:07: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:16: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:02:08: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:02:08: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:16: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.5100s; samplesPerSecond = 1255.0
-MPI Rank 2: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3359s; samplesPerSecond = 1905.5
-MPI Rank 2: 05/03/2016 18:04:17:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3439s; samplesPerSecond = 1861.0
-MPI Rank 2: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.5371s; samplesPerSecond = 1191.6
-MPI Rank 2: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3495s; samplesPerSecond = 1831.0
-MPI Rank 2: 05/03/2016 18:04:18:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3772s; samplesPerSecond = 1696.7
-MPI Rank 2: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3853s; samplesPerSecond = 1661.1
-MPI Rank 2: 05/03/2016 18:04:19:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3853s; samplesPerSecond = 1661.1
-MPI Rank 2: 05/03/2016 18:04:20:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.5108s; samplesPerSecond = 1253.0
-MPI Rank 2: 05/03/2016 18:04:20:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.4080s; samplesPerSecond = 1568.7
-MPI Rank 2: 05/03/2016 18:04:21:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3631s; samplesPerSecond = 1762.7
-MPI Rank 2: 05/03/2016 18:04:21:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3488s; samplesPerSecond = 1835.0
-MPI Rank 2: 05/03/2016 18:04:21:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3339s; samplesPerSecond = 1917.0
-MPI Rank 2: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3703s; samplesPerSecond = 1728.4
-MPI Rank 2: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.5477s; samplesPerSecond = 1168.5
-MPI Rank 2: 05/03/2016 18:04:22:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3584s; samplesPerSecond = 1785.7
-MPI Rank 2: 05/03/2016 18:04:23:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3584s; samplesPerSecond = 1785.9
-MPI Rank 2: 05/03/2016 18:04:23:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3501s; samplesPerSecond = 1828.0
-MPI Rank 2: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3287s; samplesPerSecond = 1947.1
-MPI Rank 2: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3533s; samplesPerSecond = 1811.3
-MPI Rank 2: 05/03/2016 18:04:24:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.5770s; samplesPerSecond = 1109.1
-MPI Rank 2: 05/03/2016 18:04:25:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.2910s; samplesPerSecond = 2199.3
-MPI Rank 2: 05/03/2016 18:04:25:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3390s; samplesPerSecond = 1888.0
-MPI Rank 2: 05/03/2016 18:04:25:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3494s; samplesPerSecond = 1831.9
-MPI Rank 2: 05/03/2016 18:04:26:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3276s; samplesPerSecond = 1953.7
-MPI Rank 2: 05/03/2016 18:04:26:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.5656s; samplesPerSecond = 1131.6
-MPI Rank 2: 05/03/2016 18:04:27:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3093s; samplesPerSecond = 2069.0
-MPI Rank 2: 05/03/2016 18:04:27:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3421s; samplesPerSecond = 1871.0
-MPI Rank 2: 05/03/2016 18:04:27:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3498s; samplesPerSecond = 1829.6
-MPI Rank 2: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3240s; samplesPerSecond = 1975.5
-MPI Rank 2: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3592s; samplesPerSecond = 1781.6
-MPI Rank 2: 05/03/2016 18:04:28:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.4314s; samplesPerSecond = 1483.6
-MPI Rank 2: 05/03/2016 18:04:28: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.493s
+MPI Rank 2: 05/03/2016 18:02:08: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.3117s; samplesPerSecond = 2053.4
+MPI Rank 2: 05/03/2016 18:02:09:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3748s; samplesPerSecond = 1707.5
+MPI Rank 2: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.5590s; samplesPerSecond = 1144.8
+MPI Rank 2: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3607s; samplesPerSecond = 1774.5
+MPI Rank 2: 05/03/2016 18:02:10:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3301s; samplesPerSecond = 1938.6
+MPI Rank 2: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3359s; samplesPerSecond = 1905.3
+MPI Rank 2: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3273s; samplesPerSecond = 1955.2
+MPI Rank 2: 05/03/2016 18:02:11:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3498s; samplesPerSecond = 1829.4
+MPI Rank 2: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.5444s; samplesPerSecond = 1175.7
+MPI Rank 2: 05/03/2016 18:02:12:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3267s; samplesPerSecond = 1958.9
+MPI Rank 2: 05/03/2016 18:02:13:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3750s; samplesPerSecond = 1706.6
+MPI Rank 2: 05/03/2016 18:02:13:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3823s; samplesPerSecond = 1673.9
+MPI Rank 2: 05/03/2016 18:02:13:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3130s; samplesPerSecond = 2044.4
+MPI Rank 2: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3039s; samplesPerSecond = 2105.6
+MPI Rank 2: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.5575s; samplesPerSecond = 1148.0
+MPI Rank 2: 05/03/2016 18:02:14:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3367s; samplesPerSecond = 1901.0
+MPI Rank 2: 05/03/2016 18:02:15:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3485s; samplesPerSecond = 1836.3
+MPI Rank 2: 05/03/2016 18:02:15:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3842s; samplesPerSecond = 1665.7
+MPI Rank 2: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3320s; samplesPerSecond = 1927.6
+MPI Rank 2: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.5274s; samplesPerSecond = 1213.6
+MPI Rank 2: 05/03/2016 18:02:16:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3363s; samplesPerSecond = 1903.3
+MPI Rank 2: 05/03/2016 18:02:17:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3304s; samplesPerSecond = 1936.9
+MPI Rank 2: 05/03/2016 18:02:17:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3572s; samplesPerSecond = 1791.6
+MPI Rank 2: 05/03/2016 18:02:17:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3546s; samplesPerSecond = 1804.7
+MPI Rank 2: 05/03/2016 18:02:18:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3459s; samplesPerSecond = 1850.4
+MPI Rank 2: 05/03/2016 18:02:18:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.5329s; samplesPerSecond = 1200.9
+MPI Rank 2: 05/03/2016 18:02:19:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3320s; samplesPerSecond = 1927.8
+MPI Rank 2: 05/03/2016 18:02:19:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.3497s; samplesPerSecond = 1830.2
+MPI Rank 2: 05/03/2016 18:02:19:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3734s; samplesPerSecond = 1713.9
+MPI Rank 2: 05/03/2016 18:02:20:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3462s; samplesPerSecond = 1848.7
+MPI Rank 2: 05/03/2016 18:02:20:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3215s; samplesPerSecond = 1990.5
+MPI Rank 2: 05/03/2016 18:02:21:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.5477s; samplesPerSecond = 1168.4
+MPI Rank 2: 05/03/2016 18:02:21: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.2164s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:29: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:02:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:29: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.054374
-MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.058776
-MPI Rank 2: 05/03/2016 18:04:29:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.6954s; samplesPerSecond = 3313.4
-MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.080358
-MPI Rank 2: Async gradient aggregation wait time: 0.030056
-MPI Rank 2: Actual gradient aggregation time: 0.049554
-MPI Rank 2: 05/03/2016 18:04:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9450s; samplesPerSecond = 2709.1
-MPI Rank 2: Async gradient aggregation wait time: 8e-06
-MPI Rank 2: Actual gradient aggregation time: 0.027105
-MPI Rank 2: Async gradient aggregation wait time: 0.014686
-MPI Rank 2: Actual gradient aggregation time: 0.090145
-MPI Rank 2: 05/03/2016 18:04:31:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.7990s; samplesPerSecond = 3204.1
-MPI Rank 2: Async gradient aggregation wait time: 6e-06
-MPI Rank 2: Actual gradient aggregation time: 0.02671
-MPI Rank 2: Async gradient aggregation wait time: 1.4e-05
-MPI Rank 2: Actual gradient aggregation time: 0.097757
-MPI Rank 2: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.6730s; samplesPerSecond = 3804.1
-MPI Rank 2: Async gradient aggregation wait time: 0.184676
-MPI Rank 2: Actual gradient aggregation time: 0.048015
-MPI Rank 2: Async gradient aggregation wait time: 0.026121
-MPI Rank 2: Actual gradient aggregation time: 0.022691
-MPI Rank 2: 05/03/2016 18:04:32:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.8535s; samplesPerSecond = 2999.4
+MPI Rank 2: 05/03/2016 18:02:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.042156
+MPI Rank 2: Async gradient aggregation wait time: 0.025113
+MPI Rank 2: Actual gradient aggregation time: 0.038122
+MPI Rank 2: 05/03/2016 18:02:21:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.11006760 * 2304; EvalErrorPrediction = 0.57161458 * 2304; time = 0.5358s; samplesPerSecond = 4299.9
+MPI Rank 2: Async gradient aggregation wait time: 0.038493
+MPI Rank 2: Actual gradient aggregation time: 0.255541
 MPI Rank 2: Async gradient aggregation wait time: 1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.031044
-MPI Rank 2: Async gradient aggregation wait time: 0.028281
-MPI Rank 2: Actual gradient aggregation time: 0.05386
-MPI Rank 2: 05/03/2016 18:04:33:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7514s; samplesPerSecond = 3406.9
-MPI Rank 2: Async gradient aggregation wait time: 0.022477
-MPI Rank 2: Actual gradient aggregation time: 0.046285
-MPI Rank 2: Async gradient aggregation wait time: 0.010785
-MPI Rank 2: Actual gradient aggregation time: 0.140526
-MPI Rank 2: 05/03/2016 18:04:34:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.9125s; samplesPerSecond = 2805.4
-MPI Rank 2: Async gradient aggregation wait time: 0.074981
-MPI Rank 2: Actual gradient aggregation time: 0.071114
-MPI Rank 2: Async gradient aggregation wait time: 0.053519
-MPI Rank 2: Actual gradient aggregation time: 0.052116
-MPI Rank 2: 05/03/2016 18:04:35:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.7228s; samplesPerSecond = 3541.7
-MPI Rank 2: Async gradient aggregation wait time: 0.015266
-MPI Rank 2: Actual gradient aggregation time: 0.026603
-MPI Rank 2: 05/03/2016 18:04:35: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.41129s
+MPI Rank 2: Actual gradient aggregation time: 0.059359
+MPI Rank 2: 05/03/2016 18:02:22:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08344055 * 2560; EvalErrorPrediction = 0.57500000 * 2560; time = 0.9425s; samplesPerSecond = 2716.2
+MPI Rank 2: Async gradient aggregation wait time: 0.01174
+MPI Rank 2: Actual gradient aggregation time: 0.086586
+MPI Rank 2: Async gradient aggregation wait time: 1e-05
+MPI Rank 2: Actual gradient aggregation time: 0.036318
+MPI Rank 2: 05/03/2016 18:02:23:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06587458 * 2560; EvalErrorPrediction = 0.56796875 * 2560; time = 0.7727s; samplesPerSecond = 3312.9
+MPI Rank 2: Async gradient aggregation wait time: 0.051148
+MPI Rank 2: Actual gradient aggregation time: 0.058073
+MPI Rank 2: Async gradient aggregation wait time: 8e-06
+MPI Rank 2: Actual gradient aggregation time: 0.097245
+MPI Rank 2: 05/03/2016 18:02:24:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10937064 * 2560; EvalErrorPrediction = 0.60859375 * 2560; time = 0.9562s; samplesPerSecond = 2677.2
+MPI Rank 2: Async gradient aggregation wait time: 1e-05
+MPI Rank 2: Actual gradient aggregation time: 0.036292
+MPI Rank 2: Async gradient aggregation wait time: 9e-06
+MPI Rank 2: Actual gradient aggregation time: 0.043055
+MPI Rank 2: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02788461 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.6419s; samplesPerSecond = 3988.3
+MPI Rank 2: Async gradient aggregation wait time: 0.0247
+MPI Rank 2: Actual gradient aggregation time: 0.058898
+MPI Rank 2: Async gradient aggregation wait time: 9e-06
+MPI Rank 2: Actual gradient aggregation time: 0.037588
+MPI Rank 2: 05/03/2016 18:02:25:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.24576823 * 2560; EvalErrorPrediction = 0.60117188 * 2560; time = 0.7543s; samplesPerSecond = 3393.8
+MPI Rank 2: Async gradient aggregation wait time: 0.027065
+MPI Rank 2: Actual gradient aggregation time: 0.057028
+MPI Rank 2: Async gradient aggregation wait time: 0.042924
+MPI Rank 2: Actual gradient aggregation time: 0.187069
+MPI Rank 2: 05/03/2016 18:02:26:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.15226292 * 2560; EvalErrorPrediction = 0.58125000 * 2560; time = 0.8435s; samplesPerSecond = 3035.0
+MPI Rank 2: Async gradient aggregation wait time: 0.00337
+MPI Rank 2: Actual gradient aggregation time: 0.100758
+MPI Rank 2: Async gradient aggregation wait time: 0.027693
+MPI Rank 2: Actual gradient aggregation time: 0.06752
+MPI Rank 2: 05/03/2016 18:02:27:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.26731511 * 2560; EvalErrorPrediction = 0.62617188 * 2560; time = 0.8399s; samplesPerSecond = 3047.9
+MPI Rank 2: Async gradient aggregation wait time: 0.022659
+MPI Rank 2: Actual gradient aggregation time: 0.046147
+MPI Rank 2: 05/03/2016 18:02:27: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.13592086 * 20480; EvalErrorPrediction = 0.58808594 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.36199s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:35: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:02:27: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:35: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.066016
-MPI Rank 2: Async gradient aggregation wait time: 0.010771
-MPI Rank 2: Actual gradient aggregation time: 0.184087
-MPI Rank 2: 05/03/2016 18:04:37:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 1.7830s; samplesPerSecond = 5168.8
-MPI Rank 2: Async gradient aggregation wait time: 0.029405
-MPI Rank 2: Actual gradient aggregation time: 0.261151
-MPI Rank 2: Async gradient aggregation wait time: 0.010742
-MPI Rank 2: Actual gradient aggregation time: 0.185346
-MPI Rank 2: 05/03/2016 18:04:39:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.1750s; samplesPerSecond = 4708.1
-MPI Rank 2: 05/03/2016 18:04:39: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.26332s
+MPI Rank 2: 05/03/2016 18:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.022094
+MPI Rank 2: Actual gradient aggregation time: 0.35092
+MPI Rank 2: Async gradient aggregation wait time: 0.111076
+MPI Rank 2: Actual gradient aggregation time: 0.192606
+MPI Rank 2: 05/03/2016 18:02:29:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.38080818 * 9216; EvalErrorPrediction = 0.66710069 * 9216; time = 2.1430s; samplesPerSecond = 4300.5
+MPI Rank 2: Async gradient aggregation wait time: 0.008177
+MPI Rank 2: Actual gradient aggregation time: 0.267177
+MPI Rank 2: Async gradient aggregation wait time: 8e-06
+MPI Rank 2: Actual gradient aggregation time: 0.120335
+MPI Rank 2: 05/03/2016 18:02:32:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.22297658 * 10240; EvalErrorPrediction = 0.60244141 * 10240; time = 2.2761s; samplesPerSecond = 4499.0
+MPI Rank 2: 05/03/2016 18:02:32: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.29018770 * 20480; EvalErrorPrediction = 0.62949219 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.59655s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:02:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:39: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.250152
-MPI Rank 2: Async gradient aggregation wait time: 0.006464
-MPI Rank 2: Actual gradient aggregation time: 0.144527
-MPI Rank 2: 05/03/2016 18:04:42:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.3829s; samplesPerSecond = 3867.6
-MPI Rank 2: Async gradient aggregation wait time: 0.088602
-MPI Rank 2: Actual gradient aggregation time: 0.189628
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.187879
-MPI Rank 2: 05/03/2016 18:04:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.1011s; samplesPerSecond = 4873.7
-MPI Rank 2: Async gradient aggregation wait time: 5e-06
-MPI Rank 2: 05/03/2016 18:04:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.54139s
-MPI Rank 2: 05/03/2016 18:04:44: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:02:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 1.8e-05
+MPI Rank 2: Actual gradient aggregation time: 0.150082
+MPI Rank 2: Async gradient aggregation wait time: 0.14546
+MPI Rank 2: Actual gradient aggregation time: 0.163546
+MPI Rank 2: 05/03/2016 18:02:34:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.06740633 * 9216; EvalErrorPrediction = 0.54676649 * 9216; time = 2.1318s; samplesPerSecond = 4323.2
+MPI Rank 2: Async gradient aggregation wait time: 0.123442
+MPI Rank 2: Actual gradient aggregation time: 0.165658
+MPI Rank 2: Async gradient aggregation wait time: 0.036316
+MPI Rank 2: Actual gradient aggregation time: 0.090788
+MPI Rank 2: 05/03/2016 18:02:36:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.03252134 * 10240; EvalErrorPrediction = 0.54667969 * 10240; time = 2.2110s; samplesPerSecond = 4631.3
+MPI Rank 2: Async gradient aggregation wait time: 0.05218
+MPI Rank 2: 05/03/2016 18:02:36: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 2.04741166 * 20480; EvalErrorPrediction = 0.54687500 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=4.43728s
+MPI Rank 2: 05/03/2016 18:02:36: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:44: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:02:36: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:44: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:02:36: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
index 938891d38..c33e66cef 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/.. OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -60,6 +60,13 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 2 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
@@ -73,18 +80,11 @@ ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 1 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 2 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:04:44: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
-05/03/2016 18:04:45: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
-05/03/2016 18:04:45: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
+05/03/2016 18:02:37: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
+05/03/2016 18:02:37: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
+05/03/2016 18:02:38: Redirecting stderr to file /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 12394 on
+mpiexec has exited due to process rank 0 with PID 3745 on
 node 87698aadbc9d exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -107,32 +107,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:44: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:04:44: Build info: 
+MPI Rank 0: 05/03/2016 18:02:37: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:02:37: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:04:44: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 0: 05/03/2016 18:04:44: 		Build type: release
-MPI Rank 0: 05/03/2016 18:04:44: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:04:44: 		With 1bit-SGD: yes
-MPI Rank 0: 05/03/2016 18:04:44: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:04:44: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:04:44: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:04:44: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:04:44: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:04:44: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:04:44: 		Built by philly on 87698aadbc9d
-MPI Rank 0: 05/03/2016 18:04:44: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:04:44: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:02:37: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:02:37: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 0: 05/03/2016 18:02:37: 		Build type: release
+MPI Rank 0: 05/03/2016 18:02:37: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:02:37: 		With 1bit-SGD: yes
+MPI Rank 0: 05/03/2016 18:02:37: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:02:37: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:02:37: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:02:37: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:02:37: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:02:37: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:02:37: 		Built by philly on 87698aadbc9d
+MPI Rank 0: 05/03/2016 18:02:37: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:02:37: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: Running on localhost at 2016/05/03 18:04:44
-MPI Rank 0: 05/03/2016 18:04:44: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: 05/03/2016 18:02:37: Running on localhost at 2016/05/03 18:02:37
+MPI Rank 0: 05/03/2016 18:02:37: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:44: precision = "float"
+MPI Rank 0: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:37: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -222,14 +222,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -237,18 +235,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:04:44: precision = "float"
+MPI Rank 0: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:37: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -332,14 +330,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -347,24 +343,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -446,35 +442,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:04:44: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:04:44: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:04:44: Precision = "double"
-MPI Rank 0: 05/03/2016 18:04:44: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:04:44: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:04:44: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 18:04:44: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:02:37: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:02:37: Precision = "double"
+MPI Rank 0: 05/03/2016 18:02:37: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 18:02:37: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:02:37: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 18:02:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: ##############################################################################
-MPI Rank 0: 05/03/2016 18:04:44: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:44: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:04:44: #                                                                            #
-MPI Rank 0: 05/03/2016 18:04:44: ##############################################################################
+MPI Rank 0: 05/03/2016 18:02:37: ##############################################################################
+MPI Rank 0: 05/03/2016 18:02:37: #                                                                            #
+MPI Rank 0: 05/03/2016 18:02:37: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:02:37: #                                                                            #
+MPI Rank 0: 05/03/2016 18:02:37: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:02:37: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:44: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:02:37: Creating virgin network.
 MPI Rank 0: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -527,14 +522,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 18:02:37: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:04:45: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:02:37: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:02:37: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:02:37: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:02:37: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -542,193 +537,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x2be5f08: {[features Value[363 x *]] }
-MPI Rank 0: 0x2c00dd8: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x35653a8: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x35658b8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x3566628: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x3a94da8: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x3a95b78: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x3a96d18: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x3a979c8: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x3a987f8: {[labels Value[132 x *]] }
-MPI Rank 0: 0x3a99a58: {[Prior Value[132]] }
-MPI Rank 0: 0x3a9f2f8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x3a9f5f8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x3a9f7b8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x3a9fc48: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3a9fdb8: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x3d80a38: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x3d811f8: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x3d81408: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x3d81568: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3d816c8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3d81888: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x3d81a48: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x3d81c08: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x3d82768: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x3d82928: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3d82ae8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3d82ca8: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0x25594f8: {[features Value[363 x *]] }
+MPI Rank 0: 0x2d583a8: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x3197468: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x3197978: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x334f538: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x3350308: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x33514a8: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x3352158: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x3352f88: {[labels Value[132 x *]] }
+MPI Rank 0: 0x33541e8: {[Prior Value[132]] }
+MPI Rank 0: 0x3359a88: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x3359d88: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x3359f48: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x335a3d8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x335a548: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x335fb48: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0x363b1c8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x363b988: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x363bb98: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x363bcf8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x363be58: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x363c018: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x363c1d8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x363c398: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x363cef8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x363d0b8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x363d278: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x363d438: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:02:37: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:45: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:04:45: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:04:45: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:02:37: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:02:37: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:02:37: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:48: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:02:40: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:48: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:02:41: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:49: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1927s; samplesPerSecond = 3321.3
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0956s; samplesPerSecond = 6697.2
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6689.0
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0955s; samplesPerSecond = 6698.3
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0955s; samplesPerSecond = 6703.9
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0955s; samplesPerSecond = 6701.9
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0955s; samplesPerSecond = 6704.2
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 0: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0955s; samplesPerSecond = 6702.3
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0954s; samplesPerSecond = 6706.8
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0956s; samplesPerSecond = 6696.0
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0955s; samplesPerSecond = 6698.6
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0955s; samplesPerSecond = 6704.4
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0955s; samplesPerSecond = 6702.2
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0954s; samplesPerSecond = 6705.9
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0956s; samplesPerSecond = 6697.5
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 0: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0954s; samplesPerSecond = 6707.3
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0954s; samplesPerSecond = 6710.4
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0955s; samplesPerSecond = 6698.3
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0955s; samplesPerSecond = 6701.1
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0956s; samplesPerSecond = 6695.1
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0956s; samplesPerSecond = 6694.7
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0956s; samplesPerSecond = 6692.4
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0957s; samplesPerSecond = 6690.1
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6686.1
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6692.5
-MPI Rank 0: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0956s; samplesPerSecond = 6697.2
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0953s; samplesPerSecond = 6715.8
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0954s; samplesPerSecond = 6710.4
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0777s; samplesPerSecond = 8234.2
-MPI Rank 0: 05/03/2016 18:04:52: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.18338s
-MPI Rank 0: 05/03/2016 18:04:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:02:41: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0972s; samplesPerSecond = 6587.0
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0957s; samplesPerSecond = 6684.4
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0958s; samplesPerSecond = 6683.9
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0958s; samplesPerSecond = 6683.9
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0957s; samplesPerSecond = 6686.7
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6692.5
+MPI Rank 0: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6692.4
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0956s; samplesPerSecond = 6691.1
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6690.2
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6686.8
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6686.7
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0957s; samplesPerSecond = 6685.3
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0957s; samplesPerSecond = 6686.2
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0957s; samplesPerSecond = 6688.2
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0957s; samplesPerSecond = 6688.2
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0957s; samplesPerSecond = 6689.9
+MPI Rank 0: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0957s; samplesPerSecond = 6689.6
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0957s; samplesPerSecond = 6688.5
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0957s; samplesPerSecond = 6689.7
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0959s; samplesPerSecond = 6675.5
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0957s; samplesPerSecond = 6689.8
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0958s; samplesPerSecond = 6682.5
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6687.7
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0957s; samplesPerSecond = 6685.5
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0957s; samplesPerSecond = 6686.7
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0957s; samplesPerSecond = 6689.5
+MPI Rank 0: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6685.3
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6692.4
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0957s; samplesPerSecond = 6690.7
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0956s; samplesPerSecond = 6693.7
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0957s; samplesPerSecond = 6690.8
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0957s; samplesPerSecond = 6688.5
+MPI Rank 0: 05/03/2016 18:02:44: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.0674s
+MPI Rank 0: 05/03/2016 18:02:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:52: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:02:44: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:52: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.009254
-MPI Rank 0: Async gradient aggregation wait time: 0.00662
-MPI Rank 0: Actual gradient aggregation time: 0.014059
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1414s; samplesPerSecond = 16294.2
-MPI Rank 0: Async gradient aggregation wait time: 0.005286
-MPI Rank 0: Actual gradient aggregation time: 0.012342
-MPI Rank 0: Async gradient aggregation wait time: 0.001836
-MPI Rank 0: Actual gradient aggregation time: 0.012456
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1323s; samplesPerSecond = 19351.4
-MPI Rank 0: Async gradient aggregation wait time: 0.00187
-MPI Rank 0: Actual gradient aggregation time: 0.012073
-MPI Rank 0: Async gradient aggregation wait time: 0.002203
-MPI Rank 0: Actual gradient aggregation time: 0.012322
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1320s; samplesPerSecond = 19388.9
-MPI Rank 0: Async gradient aggregation wait time: 0.002184
-MPI Rank 0: Actual gradient aggregation time: 0.012265
-MPI Rank 0: Async gradient aggregation wait time: 0.001892
-MPI Rank 0: Actual gradient aggregation time: 0.012251
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1301s; samplesPerSecond = 19682.9
-MPI Rank 0: Async gradient aggregation wait time: 0.000995
-MPI Rank 0: Actual gradient aggregation time: 0.0126
-MPI Rank 0: Async gradient aggregation wait time: 0.002218
-MPI Rank 0: Actual gradient aggregation time: 0.012285
-MPI Rank 0: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1295s; samplesPerSecond = 19767.3
-MPI Rank 0: Async gradient aggregation wait time: 0.002182
-MPI Rank 0: Actual gradient aggregation time: 0.012281
-MPI Rank 0: Async gradient aggregation wait time: 0.00222
-MPI Rank 0: Actual gradient aggregation time: 0.012241
-MPI Rank 0: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1303s; samplesPerSecond = 19652.7
-MPI Rank 0: Async gradient aggregation wait time: 0.001346
-MPI Rank 0: Actual gradient aggregation time: 0.01241
-MPI Rank 0: Async gradient aggregation wait time: 0.002222
-MPI Rank 0: Actual gradient aggregation time: 0.012249
-MPI Rank 0: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1294s; samplesPerSecond = 19778.9
-MPI Rank 0: Async gradient aggregation wait time: 0.001875
-MPI Rank 0: Actual gradient aggregation time: 0.012263
-MPI Rank 0: Async gradient aggregation wait time: 0.000998
-MPI Rank 0: Actual gradient aggregation time: 0.012548
-MPI Rank 0: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1302s; samplesPerSecond = 19662.8
-MPI Rank 0: Async gradient aggregation wait time: 0.003505
-MPI Rank 0: Actual gradient aggregation time: 0.002575
-MPI Rank 0: 05/03/2016 18:04:53: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.06459s
-MPI Rank 0: 05/03/2016 18:04:53: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 18:02:44: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.010675
+MPI Rank 0: Async gradient aggregation wait time: 0.008703
+MPI Rank 0: Actual gradient aggregation time: 0.014885
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1356s; samplesPerSecond = 16985.1
+MPI Rank 0: Async gradient aggregation wait time: 0.007204
+MPI Rank 0: Actual gradient aggregation time: 0.012707
+MPI Rank 0: Async gradient aggregation wait time: 0.001554
+MPI Rank 0: Actual gradient aggregation time: 0.012739
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1326s; samplesPerSecond = 19309.8
+MPI Rank 0: Async gradient aggregation wait time: 0.001146
+MPI Rank 0: Actual gradient aggregation time: 0.011897
+MPI Rank 0: Async gradient aggregation wait time: 0.001853
+MPI Rank 0: Actual gradient aggregation time: 0.011355
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1308s; samplesPerSecond = 19574.1
+MPI Rank 0: Async gradient aggregation wait time: 0.001707
+MPI Rank 0: Actual gradient aggregation time: 0.011351
+MPI Rank 0: Async gradient aggregation wait time: 0.001153
+MPI Rank 0: Actual gradient aggregation time: 0.012555
+MPI Rank 0: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1289s; samplesPerSecond = 19867.3
+MPI Rank 0: Async gradient aggregation wait time: 0.001791
+MPI Rank 0: Actual gradient aggregation time: 0.012644
+MPI Rank 0: Async gradient aggregation wait time: 0.001864
+MPI Rank 0: Actual gradient aggregation time: 0.011375
+MPI Rank 0: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1293s; samplesPerSecond = 19802.4
+MPI Rank 0: Async gradient aggregation wait time: 0.001787
+MPI Rank 0: Actual gradient aggregation time: 0.012652
+MPI Rank 0: Async gradient aggregation wait time: 0.001174
+MPI Rank 0: Actual gradient aggregation time: 0.012685
+MPI Rank 0: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1290s; samplesPerSecond = 19841.1
+MPI Rank 0: Async gradient aggregation wait time: 0.001732
+MPI Rank 0: Actual gradient aggregation time: 0.011326
+MPI Rank 0: Async gradient aggregation wait time: 0.001
+MPI Rank 0: Actual gradient aggregation time: 0.01258
+MPI Rank 0: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1284s; samplesPerSecond = 19942.0
+MPI Rank 0: Async gradient aggregation wait time: 0.001878
+MPI Rank 0: Actual gradient aggregation time: 0.011341
+MPI Rank 0: Async gradient aggregation wait time: 0.000984
+MPI Rank 0: Actual gradient aggregation time: 0.012934
+MPI Rank 0: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1296s; samplesPerSecond = 19752.8
+MPI Rank 0: Async gradient aggregation wait time: 0.003624
+MPI Rank 0: Actual gradient aggregation time: 0.002579
+MPI Rank 0: 05/03/2016 18:02:45: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.05384s
+MPI Rank 0: 05/03/2016 18:02:45: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:02:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.005561
-MPI Rank 0: Actual gradient aggregation time: 0.025442
-MPI Rank 0: Async gradient aggregation wait time: 0.001876
-MPI Rank 0: Actual gradient aggregation time: 0.026315
-MPI Rank 0: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2725s; samplesPerSecond = 33822.4
-MPI Rank 0: Async gradient aggregation wait time: 0.005524
-MPI Rank 0: Actual gradient aggregation time: 0.025069
-MPI Rank 0: Async gradient aggregation wait time: 0.003595
-MPI Rank 0: Actual gradient aggregation time: 0.033527
-MPI Rank 0: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2698s; samplesPerSecond = 37954.2
-MPI Rank 0: 05/03/2016 18:04:53: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.552222s
-MPI Rank 0: 05/03/2016 18:04:53: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
+MPI Rank 0: 05/03/2016 18:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.001734
+MPI Rank 0: Actual gradient aggregation time: 0.024806
+MPI Rank 0: Async gradient aggregation wait time: 0.00175
+MPI Rank 0: Actual gradient aggregation time: 0.024612
+MPI Rank 0: 05/03/2016 18:02:45:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2718s; samplesPerSecond = 33910.0
+MPI Rank 0: Async gradient aggregation wait time: 0.005154
+MPI Rank 0: Actual gradient aggregation time: 0.025405
+MPI Rank 0: Async gradient aggregation wait time: 0.003036
+MPI Rank 0: Actual gradient aggregation time: 0.032195
+MPI Rank 0: 05/03/2016 18:02:46:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2730s; samplesPerSecond = 37503.9
+MPI Rank 0: 05/03/2016 18:02:46: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.55522s
+MPI Rank 0: 05/03/2016 18:02:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:53: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:02:46: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.002039
-MPI Rank 0: Actual gradient aggregation time: 0.027137
-MPI Rank 0: Async gradient aggregation wait time: 0.004475
-MPI Rank 0: Actual gradient aggregation time: 0.029132
-MPI Rank 0: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2662s; samplesPerSecond = 34617.1
-MPI Rank 0: Async gradient aggregation wait time: 0.001833
-MPI Rank 0: Actual gradient aggregation time: 0.026565
-MPI Rank 0: Async gradient aggregation wait time: 0.01249
-MPI Rank 0: Actual gradient aggregation time: 0.025106
-MPI Rank 0: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2696s; samplesPerSecond = 37985.2
-MPI Rank 0: Async gradient aggregation wait time: 0.003406
-MPI Rank 0: 05/03/2016 18:04:54: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.54653s
-MPI Rank 0: 05/03/2016 18:04:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:04:54: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 18:02:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.001838
+MPI Rank 0: Actual gradient aggregation time: 0.027175
+MPI Rank 0: Async gradient aggregation wait time: 0.003847
+MPI Rank 0: Actual gradient aggregation time: 0.030303
+MPI Rank 0: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2665s; samplesPerSecond = 34576.3
+MPI Rank 0: Async gradient aggregation wait time: 0.001811
+MPI Rank 0: Actual gradient aggregation time: 0.025725
+MPI Rank 0: Async gradient aggregation wait time: 0.011259
+MPI Rank 0: Actual gradient aggregation time: 0.022804
+MPI Rank 0: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2685s; samplesPerSecond = 38130.8
+MPI Rank 0: Async gradient aggregation wait time: 0.003564
+MPI Rank 0: 05/03/2016 18:02:46: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.545418s
+MPI Rank 0: 05/03/2016 18:02:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:02:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:54: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:02:46: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:04:54: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:04:45: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:04:45: Build info: 
+MPI Rank 0: 05/03/2016 18:02:46: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:02:37: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:02:37: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:04:45: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 1: 05/03/2016 18:04:45: 		Build type: release
-MPI Rank 1: 05/03/2016 18:04:45: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:04:45: 		With 1bit-SGD: yes
-MPI Rank 1: 05/03/2016 18:04:45: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:04:45: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:04:45: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:04:45: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:04:45: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:04:45: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:04:45: 		Built by philly on 87698aadbc9d
-MPI Rank 1: 05/03/2016 18:04:45: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:04:45: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:02:37: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:02:37: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 1: 05/03/2016 18:02:37: 		Build type: release
+MPI Rank 1: 05/03/2016 18:02:37: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:02:37: 		With 1bit-SGD: yes
+MPI Rank 1: 05/03/2016 18:02:37: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:02:37: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:02:37: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:02:37: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:02:37: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:02:37: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:02:37: 		Built by philly on 87698aadbc9d
+MPI Rank 1: 05/03/2016 18:02:37: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:02:37: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Running on localhost at 2016/05/03 18:04:45
-MPI Rank 1: 05/03/2016 18:04:45: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: 05/03/2016 18:02:37: Running on localhost at 2016/05/03 18:02:37
+MPI Rank 1: 05/03/2016 18:02:37: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:45: precision = "float"
+MPI Rank 1: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:37: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -818,14 +819,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -833,18 +832,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:04:45: precision = "float"
+MPI Rank 1: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:37: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -928,14 +927,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -943,24 +940,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:02:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1042,35 +1039,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:04:45: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:04:45: Precision = "double"
-MPI Rank 1: 05/03/2016 18:04:45: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:04:45: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:04:45: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 18:04:45: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 18:02:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:02:37: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:02:37: Precision = "double"
+MPI Rank 1: 05/03/2016 18:02:37: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 18:02:37: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:02:37: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 18:02:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: ##############################################################################
-MPI Rank 1: 05/03/2016 18:04:45: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:45: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:04:45: #                                                                            #
-MPI Rank 1: 05/03/2016 18:04:45: ##############################################################################
+MPI Rank 1: 05/03/2016 18:02:37: ##############################################################################
+MPI Rank 1: 05/03/2016 18:02:37: #                                                                            #
+MPI Rank 1: 05/03/2016 18:02:37: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:02:37: #                                                                            #
+MPI Rank 1: 05/03/2016 18:02:37: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:02:37: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:02:37: Creating virgin network.
 MPI Rank 1: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1123,14 +1119,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 18:02:38: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:04:45: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:02:38: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:02:38: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:02:38: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:02:38: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1138,189 +1134,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x2a9f048: {[features Value[363 x *]] }
-MPI Rank 1: 0x2e64d18: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x2e74088: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x2e74598: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x37bf318: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x37c1488: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x3b75a48: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x3b76be8: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x3b77898: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x3b786c8: {[labels Value[132 x *]] }
-MPI Rank 1: 0x3b79928: {[Prior Value[132]] }
-MPI Rank 1: 0x3b7f358: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x3b7f4b8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x3b7f678: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x3b7fb08: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3b7fc38: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x3b81398: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x3b81b58: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x3b81d68: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x3b81ec8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3b82088: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x3b82248: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x3b82408: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x3b825c8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x3b83128: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x3b832e8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x3b834a8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x3b83668: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x2570ad8: {[features Value[363 x *]] }
+MPI Rank 1: 0x325e1e8: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x325e6b8: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0x325f3b8: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x3353178: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x3353f48: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x33550e8: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x3355d98: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x3356bc8: {[labels Value[132 x *]] }
+MPI Rank 1: 0x3357e28: {[Prior Value[132]] }
+MPI Rank 1: 0x335d6c8: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x335d9c8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x335db88: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x335e018: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x335e188: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0x3363788: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x363ee68: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x363f628: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x363f838: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x363f998: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x363faf8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x363fcb8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x363fe78: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x3640038: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x3640b98: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x3640d58: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x3640f18: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x36410d8: {[B2 Gradient[132 x 1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:02:38: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:45: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:04:45: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:04:45: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:02:38: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:02:38: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:02:38: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:48: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:02:41: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:48: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:02:41: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:49: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1408s; samplesPerSecond = 4546.6
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0956s; samplesPerSecond = 6695.1
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6690.3
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0956s; samplesPerSecond = 6693.0
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0955s; samplesPerSecond = 6702.6
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0955s; samplesPerSecond = 6702.8
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0955s; samplesPerSecond = 6702.2
-MPI Rank 1: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0955s; samplesPerSecond = 6701.6
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0954s; samplesPerSecond = 6705.4
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0955s; samplesPerSecond = 6701.3
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0956s; samplesPerSecond = 6693.7
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0955s; samplesPerSecond = 6702.7
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0955s; samplesPerSecond = 6703.7
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0955s; samplesPerSecond = 6700.1
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0955s; samplesPerSecond = 6703.5
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0954s; samplesPerSecond = 6706.6
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0955s; samplesPerSecond = 6700.4
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0955s; samplesPerSecond = 6702.2
-MPI Rank 1: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0953s; samplesPerSecond = 6712.5
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0955s; samplesPerSecond = 6701.3
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0955s; samplesPerSecond = 6700.3
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0955s; samplesPerSecond = 6699.0
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0956s; samplesPerSecond = 6697.6
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0957s; samplesPerSecond = 6687.6
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0956s; samplesPerSecond = 6692.6
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6689.1
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0957s; samplesPerSecond = 6687.4
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0956s; samplesPerSecond = 6694.7
-MPI Rank 1: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0954s; samplesPerSecond = 6706.8
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0953s; samplesPerSecond = 6715.7
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0955s; samplesPerSecond = 6702.7
-MPI Rank 1: 05/03/2016 18:04:52: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.14474s
+MPI Rank 1: 05/03/2016 18:02:41: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0966s; samplesPerSecond = 6628.3
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0958s; samplesPerSecond = 6683.8
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0958s; samplesPerSecond = 6683.8
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6684.4
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0957s; samplesPerSecond = 6687.1
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6693.0
+MPI Rank 1: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6692.0
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0957s; samplesPerSecond = 6689.2
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6690.2
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6686.8
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6686.3
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0957s; samplesPerSecond = 6685.6
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0957s; samplesPerSecond = 6687.5
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0957s; samplesPerSecond = 6687.6
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0957s; samplesPerSecond = 6687.7
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0957s; samplesPerSecond = 6689.9
+MPI Rank 1: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0957s; samplesPerSecond = 6690.3
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0957s; samplesPerSecond = 6688.5
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0957s; samplesPerSecond = 6689.5
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0959s; samplesPerSecond = 6674.7
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0957s; samplesPerSecond = 6688.5
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0958s; samplesPerSecond = 6682.1
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6688.3
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0957s; samplesPerSecond = 6684.8
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0957s; samplesPerSecond = 6686.9
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0957s; samplesPerSecond = 6687.8
+MPI Rank 1: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6686.2
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6692.6
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0957s; samplesPerSecond = 6690.4
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0956s; samplesPerSecond = 6692.7
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0956s; samplesPerSecond = 6692.2
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0957s; samplesPerSecond = 6686.8
+MPI Rank 1: 05/03/2016 18:02:44: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.0667s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:52: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:02:44: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:52: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.014332
-MPI Rank 1: Async gradient aggregation wait time: 0.002741
-MPI Rank 1: Actual gradient aggregation time: 0.014116
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1502s; samplesPerSecond = 15339.5
-MPI Rank 1: Async gradient aggregation wait time: 0.000337
-MPI Rank 1: Actual gradient aggregation time: 0.012261
-MPI Rank 1: Async gradient aggregation wait time: 0.001592
-MPI Rank 1: Actual gradient aggregation time: 0.012403
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1231s; samplesPerSecond = 20799.6
-MPI Rank 1: Async gradient aggregation wait time: 0.002547
-MPI Rank 1: Actual gradient aggregation time: 0.014091
-MPI Rank 1: Async gradient aggregation wait time: 0.001587
-MPI Rank 1: Actual gradient aggregation time: 0.01285
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1324s; samplesPerSecond = 19340.9
-MPI Rank 1: Async gradient aggregation wait time: 0.001572
-MPI Rank 1: Actual gradient aggregation time: 0.012771
-MPI Rank 1: Async gradient aggregation wait time: 0.001618
-MPI Rank 1: Actual gradient aggregation time: 0.012789
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1297s; samplesPerSecond = 19739.4
-MPI Rank 1: Async gradient aggregation wait time: 0.001773
-MPI Rank 1: Actual gradient aggregation time: 0.012482
-MPI Rank 1: Async gradient aggregation wait time: 0.001603
-MPI Rank 1: Actual gradient aggregation time: 0.012802
-MPI Rank 1: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1299s; samplesPerSecond = 19711.7
-MPI Rank 1: Async gradient aggregation wait time: 0.001583
-MPI Rank 1: Actual gradient aggregation time: 0.012806
-MPI Rank 1: Async gradient aggregation wait time: 0.001958
-MPI Rank 1: Actual gradient aggregation time: 0.012784
-MPI Rank 1: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1300s; samplesPerSecond = 19695.8
-MPI Rank 1: Async gradient aggregation wait time: 0.001859
-MPI Rank 1: Actual gradient aggregation time: 0.012294
-MPI Rank 1: Async gradient aggregation wait time: 0.001615
-MPI Rank 1: Actual gradient aggregation time: 0.012789
-MPI Rank 1: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1297s; samplesPerSecond = 19737.1
-MPI Rank 1: Async gradient aggregation wait time: 0.001602
-MPI Rank 1: Actual gradient aggregation time: 0.012786
-MPI Rank 1: Async gradient aggregation wait time: 0.001784
-MPI Rank 1: Actual gradient aggregation time: 0.012423
-MPI Rank 1: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1302s; samplesPerSecond = 19658.6
-MPI Rank 1: Async gradient aggregation wait time: 0.003195
-MPI Rank 1: Actual gradient aggregation time: 0.004004
-MPI Rank 1: 05/03/2016 18:04:53: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.06463s
+MPI Rank 1: 05/03/2016 18:02:44: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.014154
+MPI Rank 1: Async gradient aggregation wait time: 0.00111
+MPI Rank 1: Actual gradient aggregation time: 0.013655
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1457s; samplesPerSecond = 15808.5
+MPI Rank 1: Async gradient aggregation wait time: 0.000607
+MPI Rank 1: Actual gradient aggregation time: 0.011482
+MPI Rank 1: Async gradient aggregation wait time: 0.001658
+MPI Rank 1: Actual gradient aggregation time: 0.011506
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1229s; samplesPerSecond = 20823.8
+MPI Rank 1: Async gradient aggregation wait time: 0.001234
+MPI Rank 1: Actual gradient aggregation time: 0.011533
+MPI Rank 1: Async gradient aggregation wait time: 0.001577
+MPI Rank 1: Actual gradient aggregation time: 0.012617
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1308s; samplesPerSecond = 19574.9
+MPI Rank 1: Async gradient aggregation wait time: 0.001421
+MPI Rank 1: Actual gradient aggregation time: 0.012586
+MPI Rank 1: Async gradient aggregation wait time: 0.001246
+MPI Rank 1: Actual gradient aggregation time: 0.011429
+MPI Rank 1: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1288s; samplesPerSecond = 19873.9
+MPI Rank 1: Async gradient aggregation wait time: 0.001534
+MPI Rank 1: Actual gradient aggregation time: 0.011477
+MPI Rank 1: Async gradient aggregation wait time: 0.001606
+MPI Rank 1: Actual gradient aggregation time: 0.012641
+MPI Rank 1: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1293s; samplesPerSecond = 19794.8
+MPI Rank 1: Async gradient aggregation wait time: 0.001538
+MPI Rank 1: Actual gradient aggregation time: 0.011453
+MPI Rank 1: Async gradient aggregation wait time: 0.001267
+MPI Rank 1: Actual gradient aggregation time: 0.011501
+MPI Rank 1: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1290s; samplesPerSecond = 19851.9
+MPI Rank 1: Async gradient aggregation wait time: 0.001468
+MPI Rank 1: Actual gradient aggregation time: 0.012599
+MPI Rank 1: Async gradient aggregation wait time: 0.001786
+MPI Rank 1: Actual gradient aggregation time: 0.011429
+MPI Rank 1: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1285s; samplesPerSecond = 19928.7
+MPI Rank 1: Async gradient aggregation wait time: 0.001599
+MPI Rank 1: Actual gradient aggregation time: 0.012672
+MPI Rank 1: Async gradient aggregation wait time: 0.001766
+MPI Rank 1: Actual gradient aggregation time: 0.011353
+MPI Rank 1: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1296s; samplesPerSecond = 19748.1
+MPI Rank 1: Async gradient aggregation wait time: 0.003233
+MPI Rank 1: Actual gradient aggregation time: 0.00402
+MPI Rank 1: 05/03/2016 18:02:45: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.05395s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:02:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.000286
-MPI Rank 1: Actual gradient aggregation time: 0.025291
-MPI Rank 1: Async gradient aggregation wait time: 0.001598
-MPI Rank 1: Actual gradient aggregation time: 0.026832
-MPI Rank 1: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2728s; samplesPerSecond = 33781.4
-MPI Rank 1: Async gradient aggregation wait time: 0.005264
-MPI Rank 1: Actual gradient aggregation time: 0.025578
+MPI Rank 1: 05/03/2016 18:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.001551
+MPI Rank 1: Actual gradient aggregation time: 0.025746
+MPI Rank 1: Async gradient aggregation wait time: 0.001413
+MPI Rank 1: Actual gradient aggregation time: 0.026226
+MPI Rank 1: 05/03/2016 18:02:45:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2719s; samplesPerSecond = 33899.2
+MPI Rank 1: Async gradient aggregation wait time: 0.005204
+MPI Rank 1: Actual gradient aggregation time: 0.024941
 MPI Rank 1: Async gradient aggregation wait time: 2e-06
-MPI Rank 1: Actual gradient aggregation time: 0.018457
-MPI Rank 1: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2699s; samplesPerSecond = 37946.0
-MPI Rank 1: 05/03/2016 18:04:53: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.552289s
+MPI Rank 1: Actual gradient aggregation time: 0.017206
+MPI Rank 1: 05/03/2016 18:02:46:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2733s; samplesPerSecond = 37472.2
+MPI Rank 1: 05/03/2016 18:02:46: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.555317s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:53: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:02:46: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.002178
-MPI Rank 1: Actual gradient aggregation time: 0.026683
-MPI Rank 1: Async gradient aggregation wait time: 0.00432
-MPI Rank 1: Actual gradient aggregation time: 0.029614
-MPI Rank 1: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2665s; samplesPerSecond = 34584.5
-MPI Rank 1: Async gradient aggregation wait time: 0.001958
-MPI Rank 1: Actual gradient aggregation time: 0.026114
-MPI Rank 1: Async gradient aggregation wait time: 0.003203
-MPI Rank 1: Actual gradient aggregation time: 0.022848
-MPI Rank 1: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2696s; samplesPerSecond = 37982.2
-MPI Rank 1: Async gradient aggregation wait time: 0.003719
-MPI Rank 1: 05/03/2016 18:04:54: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.54659s
-MPI Rank 1: 05/03/2016 18:04:54: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:02:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.001585
+MPI Rank 1: Actual gradient aggregation time: 0.028107
+MPI Rank 1: Async gradient aggregation wait time: 0.003894
+MPI Rank 1: Actual gradient aggregation time: 0.02945
+MPI Rank 1: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2669s; samplesPerSecond = 34533.9
+MPI Rank 1: Async gradient aggregation wait time: 0.001534
+MPI Rank 1: Actual gradient aggregation time: 0.027296
+MPI Rank 1: Async gradient aggregation wait time: 0.002079
+MPI Rank 1: Actual gradient aggregation time: 0.025937
+MPI Rank 1: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2686s; samplesPerSecond = 38126.6
+MPI Rank 1: Async gradient aggregation wait time: 0.003579
+MPI Rank 1: 05/03/2016 18:02:46: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.545577s
+MPI Rank 1: 05/03/2016 18:02:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:54: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:02:46: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:04:54: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:04:45: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:04:45: Build info: 
+MPI Rank 1: 05/03/2016 18:02:46: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:02:38: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:02:38: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:04:45: 		Last modified date: Tue May  3 11:36:23 2016
-MPI Rank 2: 05/03/2016 18:04:45: 		Build type: release
-MPI Rank 2: 05/03/2016 18:04:45: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:04:45: 		With 1bit-SGD: yes
-MPI Rank 2: 05/03/2016 18:04:45: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:04:45: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:04:45: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:04:45: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:04:45: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:04:45: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:04:45: 		Built by philly on 87698aadbc9d
-MPI Rank 2: 05/03/2016 18:04:45: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:04:45: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:02:38: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:02:38: 		Last modified date: Tue May  3 11:36:23 2016
+MPI Rank 2: 05/03/2016 18:02:38: 		Build type: release
+MPI Rank 2: 05/03/2016 18:02:38: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:02:38: 		With 1bit-SGD: yes
+MPI Rank 2: 05/03/2016 18:02:38: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:02:38: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:02:38: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:02:38: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:02:38: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:02:38: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:02:38: 		Built by philly on 87698aadbc9d
+MPI Rank 2: 05/03/2016 18:02:38: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:02:38: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: Running on localhost at 2016/05/03 18:04:45
-MPI Rank 2: 05/03/2016 18:04:45: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: 05/03/2016 18:02:38: Running on localhost at 2016/05/03 18:02:38
+MPI Rank 2: 05/03/2016 18:02:38: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..  OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:45: precision = "float"
+MPI Rank 2: 05/03/2016 18:02:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:38: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1410,14 +1412,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1425,18 +1425,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:04:45: precision = "float"
+MPI Rank 2: 05/03/2016 18:02:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:38: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1520,14 +1520,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1535,24 +1533,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:02:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1634,35 +1632,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:04:45: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:04:45: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:04:45: Precision = "double"
-MPI Rank 2: 05/03/2016 18:04:45: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:04:45: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/ExperimentalHtkmlfReader/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:04:45: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 18:04:45: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 18:02:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:02:38: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:02:38: Precision = "double"
+MPI Rank 2: 05/03/2016 18:02:38: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 18:02:38: CNTKModelPath: /tmp/cntk-test-20160503180003.29154/Speech/DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:02:38: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 18:02:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: ##############################################################################
-MPI Rank 2: 05/03/2016 18:04:45: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:45: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:04:45: #                                                                            #
-MPI Rank 2: 05/03/2016 18:04:45: ##############################################################################
+MPI Rank 2: 05/03/2016 18:02:38: ##############################################################################
+MPI Rank 2: 05/03/2016 18:02:38: #                                                                            #
+MPI Rank 2: 05/03/2016 18:02:38: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:02:38: #                                                                            #
+MPI Rank 2: 05/03/2016 18:02:38: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:02:38: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:45: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:02:38: Creating virgin network.
 MPI Rank 2: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1715,14 +1712,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 18:02:38: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:04:46: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:02:38: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:02:38: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:02:38: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:02:38: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1730,160 +1727,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x17353a8: {[features Value[363 x *]] }
-MPI Rank 2: 0x208ed48: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x208fa58: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x2094438: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x25d9428: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x25da1f8: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x25db398: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x25dc048: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x25dce78: {[labels Value[132 x *]] }
-MPI Rank 2: 0x25de0d8: {[Prior Value[132]] }
-MPI Rank 2: 0x25e3978: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x25e3c78: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x25e3e38: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x25e42c8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x25e4438: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x25e9a38: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x28c5108: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x28c58c8: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x28c5ad8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x28c5c38: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x28c5d98: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x28c5f58: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x28c6118: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x28c62d8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x28c6e38: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x28c6ff8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x28c71b8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x28c7378: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0x10e3b68: {[features Value[363 x *]] }
+MPI Rank 2: 0x1d9b2a8: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x1da93f8: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x1da9908: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x1e8c968: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0x1e8ead8: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0x2243098: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x2244238: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x2244ee8: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x2245d18: {[labels Value[132 x *]] }
+MPI Rank 2: 0x2246f78: {[Prior Value[132]] }
+MPI Rank 2: 0x224cb98: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0x224ccf8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x224ceb8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x224d348: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x224d3f8: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x224eb28: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x224f2e8: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x224f5b8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x224f778: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x224f938: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x224faf8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x224fcb8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x224fe78: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x22509d8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x2250b98: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x2250d58: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x2250f18: {[B2 Gradient[132 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:02:38: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:46: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:04:46: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:04:46: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:02:38: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:02:38: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:02:38: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:48: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:02:41: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:48: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:02:41: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:49: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1927s; samplesPerSecond = 3321.5
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0956s; samplesPerSecond = 6697.5
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6689.0
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0955s; samplesPerSecond = 6698.4
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0955s; samplesPerSecond = 6704.2
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0955s; samplesPerSecond = 6702.1
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0955s; samplesPerSecond = 6704.1
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 2: 05/03/2016 18:04:49:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0955s; samplesPerSecond = 6702.4
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0954s; samplesPerSecond = 6706.8
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0956s; samplesPerSecond = 6696.1
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0955s; samplesPerSecond = 6698.3
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0955s; samplesPerSecond = 6704.4
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0955s; samplesPerSecond = 6702.2
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0954s; samplesPerSecond = 6706.1
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0956s; samplesPerSecond = 6697.6
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 2: 05/03/2016 18:04:50:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0954s; samplesPerSecond = 6707.0
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0954s; samplesPerSecond = 6710.7
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0955s; samplesPerSecond = 6698.3
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0955s; samplesPerSecond = 6701.4
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0956s; samplesPerSecond = 6695.0
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0956s; samplesPerSecond = 6693.8
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0956s; samplesPerSecond = 6692.4
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0957s; samplesPerSecond = 6689.9
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6686.2
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6692.5
-MPI Rank 2: 05/03/2016 18:04:51:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0956s; samplesPerSecond = 6697.2
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0953s; samplesPerSecond = 6715.7
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0954s; samplesPerSecond = 6710.8
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0777s; samplesPerSecond = 8235.5
-MPI Rank 2: 05/03/2016 18:04:52: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.18337s
+MPI Rank 2: 05/03/2016 18:02:41: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0956s; samplesPerSecond = 6693.2
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0958s; samplesPerSecond = 6684.0
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0958s; samplesPerSecond = 6684.0
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0958s; samplesPerSecond = 6683.4
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0957s; samplesPerSecond = 6687.6
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0956s; samplesPerSecond = 6692.7
+MPI Rank 2: 05/03/2016 18:02:41:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0956s; samplesPerSecond = 6691.7
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0957s; samplesPerSecond = 6689.2
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0957s; samplesPerSecond = 6690.9
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6686.6
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0957s; samplesPerSecond = 6687.3
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0957s; samplesPerSecond = 6685.9
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0957s; samplesPerSecond = 6687.4
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0957s; samplesPerSecond = 6687.8
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0957s; samplesPerSecond = 6687.1
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0957s; samplesPerSecond = 6690.5
+MPI Rank 2: 05/03/2016 18:02:42:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0957s; samplesPerSecond = 6690.6
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0957s; samplesPerSecond = 6688.5
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0957s; samplesPerSecond = 6690.3
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0959s; samplesPerSecond = 6674.9
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0957s; samplesPerSecond = 6688.1
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0958s; samplesPerSecond = 6682.3
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6688.1
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0957s; samplesPerSecond = 6684.9
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0957s; samplesPerSecond = 6686.7
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0957s; samplesPerSecond = 6688.2
+MPI Rank 2: 05/03/2016 18:02:43:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0957s; samplesPerSecond = 6685.5
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0956s; samplesPerSecond = 6692.4
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0957s; samplesPerSecond = 6689.9
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0956s; samplesPerSecond = 6692.8
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0956s; samplesPerSecond = 6691.6
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0957s; samplesPerSecond = 6686.7
+MPI Rank 2: 05/03/2016 18:02:44: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.06566s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:52: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:02:44: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:52: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.019882
-MPI Rank 2: Async gradient aggregation wait time: 2e-06
-MPI Rank 2: Actual gradient aggregation time: 0.013736
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1409s; samplesPerSecond = 16353.6
-MPI Rank 2: Async gradient aggregation wait time: 0.00522
-MPI Rank 2: Actual gradient aggregation time: 0.012787
-MPI Rank 2: Async gradient aggregation wait time: 0.00566
-MPI Rank 2: Actual gradient aggregation time: 0.012288
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1326s; samplesPerSecond = 19306.5
-MPI Rank 2: Async gradient aggregation wait time: 0.003918
-MPI Rank 2: Actual gradient aggregation time: 0.013279
-MPI Rank 2: Async gradient aggregation wait time: 0.002509
-MPI Rank 2: Actual gradient aggregation time: 0.012409
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1323s; samplesPerSecond = 19343.5
-MPI Rank 2: Async gradient aggregation wait time: 0.002505
-MPI Rank 2: Actual gradient aggregation time: 0.012339
-MPI Rank 2: Async gradient aggregation wait time: 0.002736
-MPI Rank 2: Actual gradient aggregation time: 0.012343
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1297s; samplesPerSecond = 19736.5
-MPI Rank 2: Async gradient aggregation wait time: 0.003377
-MPI Rank 2: Actual gradient aggregation time: 0.01203
-MPI Rank 2: Async gradient aggregation wait time: 0.001736
-MPI Rank 2: Actual gradient aggregation time: 0.012368
-MPI Rank 2: 05/03/2016 18:04:52:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1299s; samplesPerSecond = 19712.5
-MPI Rank 2: Async gradient aggregation wait time: 0.002142
-MPI Rank 2: Actual gradient aggregation time: 0.01236
-MPI Rank 2: Async gradient aggregation wait time: 0.001788
-MPI Rank 2: Actual gradient aggregation time: 0.012345
-MPI Rank 2: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1299s; samplesPerSecond = 19707.3
-MPI Rank 2: Async gradient aggregation wait time: 0.001599
-MPI Rank 2: Actual gradient aggregation time: 0.012482
-MPI Rank 2: Async gradient aggregation wait time: 0.005851
-MPI Rank 2: Actual gradient aggregation time: 0.012334
-MPI Rank 2: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1297s; samplesPerSecond = 19737.4
-MPI Rank 2: Async gradient aggregation wait time: 0.002171
-MPI Rank 2: Actual gradient aggregation time: 0.012325
-MPI Rank 2: Async gradient aggregation wait time: 0.003008
-MPI Rank 2: Actual gradient aggregation time: 0.011985
-MPI Rank 2: 05/03/2016 18:04:53:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1302s; samplesPerSecond = 19657.2
-MPI Rank 2: Async gradient aggregation wait time: 0.003183
-MPI Rank 2: Actual gradient aggregation time: 0.00311
-MPI Rank 2: 05/03/2016 18:04:53: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.06448s
+MPI Rank 2: 05/03/2016 18:02:44: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.019985
+MPI Rank 2: Async gradient aggregation wait time: 4e-06
+MPI Rank 2: Actual gradient aggregation time: 0.008816
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10515053 * 2304; EvalErrorPrediction = 0.56770833 * 2304; time = 0.1360s; samplesPerSecond = 16944.9
+MPI Rank 2: Async gradient aggregation wait time: 0.007386
+MPI Rank 2: Actual gradient aggregation time: 0.011345
+MPI Rank 2: Async gradient aggregation wait time: 0.00181
+MPI Rank 2: Actual gradient aggregation time: 0.011047
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.07710528 * 2560; EvalErrorPrediction = 0.56914062 * 2560; time = 0.1324s; samplesPerSecond = 19341.8
+MPI Rank 2: Async gradient aggregation wait time: 0.004569
+MPI Rank 2: Actual gradient aggregation time: 0.010692
+MPI Rank 2: Async gradient aggregation wait time: 0.002158
+MPI Rank 2: Actual gradient aggregation time: 0.011467
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06009947 * 2560; EvalErrorPrediction = 0.56367188 * 2560; time = 0.1312s; samplesPerSecond = 19517.6
+MPI Rank 2: Async gradient aggregation wait time: 0.002541
+MPI Rank 2: Actual gradient aggregation time: 0.01146
+MPI Rank 2: Async gradient aggregation wait time: 0.004612
+MPI Rank 2: Actual gradient aggregation time: 0.011296
+MPI Rank 2: 05/03/2016 18:02:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.09985912 * 2560; EvalErrorPrediction = 0.60898438 * 2560; time = 0.1288s; samplesPerSecond = 19873.9
+MPI Rank 2: Async gradient aggregation wait time: 0.002927
+MPI Rank 2: Actual gradient aggregation time: 0.011017
+MPI Rank 2: Async gradient aggregation wait time: 0.002039
+MPI Rank 2: Actual gradient aggregation time: 0.011454
+MPI Rank 2: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.02133028 * 2560; EvalErrorPrediction = 0.56875000 * 2560; time = 0.1293s; samplesPerSecond = 19799.1
+MPI Rank 2: Async gradient aggregation wait time: 0.002411
+MPI Rank 2: Actual gradient aggregation time: 0.010996
+MPI Rank 2: Async gradient aggregation wait time: 0.004359
+MPI Rank 2: Actual gradient aggregation time: 0.011064
+MPI Rank 2: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.23836126 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.1290s; samplesPerSecond = 19850.8
+MPI Rank 2: Async gradient aggregation wait time: 0.001575
+MPI Rank 2: Actual gradient aggregation time: 0.011457
+MPI Rank 2: Async gradient aggregation wait time: 0.003345
+MPI Rank 2: Actual gradient aggregation time: 0.011232
+MPI Rank 2: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.16973375 * 2560; EvalErrorPrediction = 0.58632812 * 2560; time = 0.1285s; samplesPerSecond = 19928.7
+MPI Rank 2: Async gradient aggregation wait time: 0.002175
+MPI Rank 2: Actual gradient aggregation time: 0.011439
+MPI Rank 2: Async gradient aggregation wait time: 0.003006
+MPI Rank 2: Actual gradient aggregation time: 0.011252
+MPI Rank 2: 05/03/2016 18:02:45:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18375111 * 2560; EvalErrorPrediction = 0.60312500 * 2560; time = 0.1296s; samplesPerSecond = 19750.3
+MPI Rank 2: Async gradient aggregation wait time: 0.003258
+MPI Rank 2: Actual gradient aggregation time: 0.003114
+MPI Rank 2: 05/03/2016 18:02:45: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.12191186 * 20480; EvalErrorPrediction = 0.58330078 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.0537s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:02:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.007281
-MPI Rank 2: Actual gradient aggregation time: 0.024853
-MPI Rank 2: Async gradient aggregation wait time: 0.007428
-MPI Rank 2: Actual gradient aggregation time: 0.026385
-MPI Rank 2: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2725s; samplesPerSecond = 33817.0
-MPI Rank 2: Async gradient aggregation wait time: 0.00115
-MPI Rank 2: Actual gradient aggregation time: 0.025132
-MPI Rank 2: Async gradient aggregation wait time: 0.006949
-MPI Rank 2: Actual gradient aggregation time: 0.03234
-MPI Rank 2: 05/03/2016 18:04:53:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2699s; samplesPerSecond = 37946.7
-MPI Rank 2: 05/03/2016 18:04:53: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.552071s
+MPI Rank 2: 05/03/2016 18:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.014768
+MPI Rank 2: Actual gradient aggregation time: 0.025282
+MPI Rank 2: Async gradient aggregation wait time: 0.004106
+MPI Rank 2: Actual gradient aggregation time: 0.025755
+MPI Rank 2: 05/03/2016 18:02:45:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.36016346 * 9216; EvalErrorPrediction = 0.67154948 * 9216; time = 0.2725s; samplesPerSecond = 33823.0
+MPI Rank 2: Async gradient aggregation wait time: 0.001015
+MPI Rank 2: Actual gradient aggregation time: 0.026207
+MPI Rank 2: Async gradient aggregation wait time: 0.003997
+MPI Rank 2: Actual gradient aggregation time: 0.036631
+MPI Rank 2: 05/03/2016 18:02:46:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.16580543 * 10240; EvalErrorPrediction = 0.60068359 * 10240; time = 0.2730s; samplesPerSecond = 37506.3
+MPI Rank 2: 05/03/2016 18:02:46: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.24959638 * 20480; EvalErrorPrediction = 0.63022461 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.555048s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:53: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:02:46: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.011737
-MPI Rank 2: Actual gradient aggregation time: 0.027331
-MPI Rank 2: Async gradient aggregation wait time: 2e-06
-MPI Rank 2: Actual gradient aggregation time: 0.016371
-MPI Rank 2: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2660s; samplesPerSecond = 34647.4
-MPI Rank 2: Async gradient aggregation wait time: 0.005399
-MPI Rank 2: Actual gradient aggregation time: 0.026685
-MPI Rank 2: Async gradient aggregation wait time: 2e-06
-MPI Rank 2: Actual gradient aggregation time: 0.022064
-MPI Rank 2: 05/03/2016 18:04:54:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2696s; samplesPerSecond = 37988.1
-MPI Rank 2: Async gradient aggregation wait time: 0.00333
-MPI Rank 2: 05/03/2016 18:04:54: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.546385s
-MPI Rank 2: 05/03/2016 18:04:54: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:02:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.015405
+MPI Rank 2: Actual gradient aggregation time: 0.02769
+MPI Rank 2: Async gradient aggregation wait time: 3e-06
+MPI Rank 2: Actual gradient aggregation time: 0.008794
+MPI Rank 2: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00308410 * 9216; EvalErrorPrediction = 0.54079861 * 9216; time = 0.2668s; samplesPerSecond = 34539.4
+MPI Rank 2: Async gradient aggregation wait time: 0.004168
+MPI Rank 2: Actual gradient aggregation time: 0.026879
+MPI Rank 2: Async gradient aggregation wait time: 3e-06
+MPI Rank 2: Actual gradient aggregation time: 0.019143
+MPI Rank 2: 05/03/2016 18:02:46:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96679954 * 10240; EvalErrorPrediction = 0.54326172 * 10240; time = 0.2686s; samplesPerSecond = 38125.3
+MPI Rank 2: Async gradient aggregation wait time: 0.003611
+MPI Rank 2: 05/03/2016 18:02:46: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98299829 * 20480; EvalErrorPrediction = 0.54199219 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.545322s
+MPI Rank 2: 05/03/2016 18:02:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:54: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:02:46: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:04:54: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:02:46: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
index e5044a39e..233d04d53 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -59,8 +59,8 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (before change)]: all 3 nodes responded
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
+ping [requestnodes (after change)]: 3 nodes pinging each other
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
@@ -91,32 +91,32 @@ job aborted:
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 15:52:37: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 15:52:37: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 15:52:37: Build info: 
+MPI Rank 0: 05/03/2016 14:47:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:47:28: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:47:28: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: 		Built time: May  3 2016 13:15:46
-MPI Rank 0: 05/03/2016 15:52:37: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 0: 05/03/2016 15:52:37: 		Build type: Release
-MPI Rank 0: 05/03/2016 15:52:37: 		Build target: GPU
-MPI Rank 0: 05/03/2016 15:52:37: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 15:52:37: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 15:52:37: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 15:52:37: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 15:52:37: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 15:52:37: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 15:52:37: 		Built by svcphil on cntk-muc01
-MPI Rank 0: 05/03/2016 15:52:37: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 15:52:37: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:47:28: 		Built time: May  3 2016 13:15:46
+MPI Rank 0: 05/03/2016 14:47:28: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 0: 05/03/2016 14:47:28: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:47:28: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:47:28: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:47:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:47:28: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:47:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:47:28: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:47:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:47:28: 		Built by svcphil on cntk-muc01
+MPI Rank 0: 05/03/2016 14:47:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:47:28: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: Running on cntk-muc01 at 2016/05/03 15:52:37
-MPI Rank 0: 05/03/2016 15:52:37: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: 05/03/2016 14:47:28: Running on cntk-muc01 at 2016/05/03 14:47:28
+MPI Rank 0: 05/03/2016 14:47:28: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:52:37: precision = "float"
+MPI Rank 0: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:47:28: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -206,14 +206,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -221,18 +219,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:52:37: precision = "float"
+MPI Rank 0: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:47:28: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -316,14 +314,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -331,24 +327,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -430,35 +426,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 15:52:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 15:52:37: Commands: speechTrain
-MPI Rank 0: 05/03/2016 15:52:37: Precision = "double"
-MPI Rank 0: 05/03/2016 15:52:37: Using 2 CPU threads.
-MPI Rank 0: 05/03/2016 15:52:37: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 15:52:37: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 15:52:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:47:28: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:47:28: Precision = "double"
+MPI Rank 0: 05/03/2016 14:47:28: Using 2 CPU threads.
+MPI Rank 0: 05/03/2016 14:47:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:47:28: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 14:47:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: ##############################################################################
-MPI Rank 0: 05/03/2016 15:52:37: #                                                                            #
-MPI Rank 0: 05/03/2016 15:52:37: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 15:52:37: #                                                                            #
-MPI Rank 0: 05/03/2016 15:52:37: ##############################################################################
+MPI Rank 0: 05/03/2016 14:47:28: ##############################################################################
+MPI Rank 0: 05/03/2016 14:47:28: #                                                                            #
+MPI Rank 0: 05/03/2016 14:47:28: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:47:28: #                                                                            #
+MPI Rank 0: 05/03/2016 14:47:28: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:37: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:47:28: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:47:28: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -510,14 +505,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 14:47:28: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: Training criterion node(s):
-MPI Rank 0: 05/03/2016 15:52:38: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:47:28: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:47:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:47:28: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:47:28: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -525,193 +520,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 00000060DCCD0A50: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 00000060DCCD0D70: {[features Value[363 x *]] }
-MPI Rank 0: 00000060DCCD0EB0: {[W0 Value[512 x 363]] }
-MPI Rank 0: 00000060DCCD0F50: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 00000060DCCD5970: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 00000060DCCD5A10: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 00000060DCCD5BF0: {[LogOfPrior Value[132]] }
-MPI Rank 0: 00000060DCCD5DD0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 00000060DCCD5E70: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 00000060DCCD6050: {[W1 Value[512 x 512]] }
-MPI Rank 0: 00000060DCCD65F0: {[B0 Value[512 x 1]] }
-MPI Rank 0: 00000060DCCD6730: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 00000060DCCD67D0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 00000060DCCD6910: {[W2 Value[132 x 512]] }
-MPI Rank 0: 00000060DCCD69B0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 00000060DCCD6A50: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 00000060DCCD6AF0: {[W0*features Value[512 x *]] }
-MPI Rank 0: 00000060DCCD6B90: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 00000060DCCD6E10: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 00000060DCCD6EB0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 00000060DCCD71D0: {[labels Value[132 x *]] }
-MPI Rank 0: 00000060DCCD7270: {[Prior Value[132]] }
-MPI Rank 0: 00000060DCCD7590: {[B1 Value[512 x 1]] }
-MPI Rank 0: 00000060DCCD7630: {[B2 Value[132 x 1]] }
-MPI Rank 0: 00000060E5CB2E40: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 00000060E5CB37A0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 00000060E5CB3D40: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 00000060E5CB3F20: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000002C612A0230: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0000002C612A0550: {[features Value[363 x *]] }
+MPI Rank 0: 0000002C612A05F0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0000002C612A0B90: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0000002C612A0C30: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0000002C612A0E10: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0000002C612CEFC0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000002C612CF100: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0000002C612CF2E0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0000002C612CF380: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000002C612CF4C0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0000002C612CF880: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0000002C612CFC40: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0000002C612CFD80: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0000002C612CFE20: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000002C612CFEC0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0000002C612D0000: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0000002C612D00A0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0000002C612D01E0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0000002C612D0280: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0000002C612D03C0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0000002C612D06E0: {[Prior Value[132]] }
+MPI Rank 0: 0000002C612D0820: {[labels Value[132 x *]] }
+MPI Rank 0: 0000002C612D08C0: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0000002C612D0A00: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0000002C612D0AA0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0000002C612D0B40: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0000002C612D0E60: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:47:28: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:38: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 15:52:38: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 15:52:38: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:47:28: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:47:28: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:47:28: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:39: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:47:30: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:40: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:47:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:52:40: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 15:52:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.8722s; samplesPerSecond = 733.8
-MPI Rank 0: 05/03/2016 15:52:42:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7173s; samplesPerSecond = 892.2
-MPI Rank 0: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7063s; samplesPerSecond = 906.2
-MPI Rank 0: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7242s; samplesPerSecond = 883.8
-MPI Rank 0: 05/03/2016 15:52:44:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7122s; samplesPerSecond = 898.6
-MPI Rank 0: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7029s; samplesPerSecond = 910.5
-MPI Rank 0: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7058s; samplesPerSecond = 906.8
-MPI Rank 0: 05/03/2016 15:52:46:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7359s; samplesPerSecond = 869.7
-MPI Rank 0: 05/03/2016 15:52:47:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7079s; samplesPerSecond = 904.0
-MPI Rank 0: 05/03/2016 15:52:48:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6802s; samplesPerSecond = 940.9
-MPI Rank 0: 05/03/2016 15:52:48:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7261s; samplesPerSecond = 881.5
-MPI Rank 0: 05/03/2016 15:52:49:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7176s; samplesPerSecond = 891.8
-MPI Rank 0: 05/03/2016 15:52:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7069s; samplesPerSecond = 905.3
-MPI Rank 0: 05/03/2016 15:52:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7078s; samplesPerSecond = 904.2
-MPI Rank 0: 05/03/2016 15:52:51:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7221s; samplesPerSecond = 886.4
-MPI Rank 0: 05/03/2016 15:52:52:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7274s; samplesPerSecond = 879.9
-MPI Rank 0: 05/03/2016 15:52:53:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7630s; samplesPerSecond = 838.8
-MPI Rank 0: 05/03/2016 15:52:53:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7057s; samplesPerSecond = 906.9
-MPI Rank 0: 05/03/2016 15:52:54:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7290s; samplesPerSecond = 877.9
-MPI Rank 0: 05/03/2016 15:52:55:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7155s; samplesPerSecond = 894.5
-MPI Rank 0: 05/03/2016 15:52:55:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7210s; samplesPerSecond = 887.6
-MPI Rank 0: 05/03/2016 15:52:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7459s; samplesPerSecond = 858.1
-MPI Rank 0: 05/03/2016 15:52:57:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7901s; samplesPerSecond = 810.0
-MPI Rank 0: 05/03/2016 15:52:58:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.8867s; samplesPerSecond = 721.7
-MPI Rank 0: 05/03/2016 15:52:59:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7242s; samplesPerSecond = 883.7
-MPI Rank 0: 05/03/2016 15:52:59:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7255s; samplesPerSecond = 882.1
-MPI Rank 0: 05/03/2016 15:53:00:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7219s; samplesPerSecond = 886.5
-MPI Rank 0: 05/03/2016 15:53:01:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7672s; samplesPerSecond = 834.2
-MPI Rank 0: 05/03/2016 15:53:02:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7475s; samplesPerSecond = 856.2
-MPI Rank 0: 05/03/2016 15:53:02:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.7838s; samplesPerSecond = 816.5
-MPI Rank 0: 05/03/2016 15:53:03:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.3259s; samplesPerSecond = 1964.1
-MPI Rank 0: 05/03/2016 15:53:03:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2327s; samplesPerSecond = 2750.6
-MPI Rank 0: 05/03/2016 15:53:03: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.7468s
-MPI Rank 0: 05/03/2016 15:53:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:47:31: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 14:47:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7558s; samplesPerSecond = 846.8
+MPI Rank 0: 05/03/2016 14:47:33:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7102s; samplesPerSecond = 901.2
+MPI Rank 0: 05/03/2016 14:47:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6823s; samplesPerSecond = 937.9
+MPI Rank 0: 05/03/2016 14:47:34:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7059s; samplesPerSecond = 906.6
+MPI Rank 0: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7087s; samplesPerSecond = 903.0
+MPI Rank 0: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7240s; samplesPerSecond = 883.9
+MPI Rank 0: 05/03/2016 14:47:36:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7270s; samplesPerSecond = 880.3
+MPI Rank 0: 05/03/2016 14:47:37:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7164s; samplesPerSecond = 893.3
+MPI Rank 0: 05/03/2016 14:47:38:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7305s; samplesPerSecond = 876.1
+MPI Rank 0: 05/03/2016 14:47:38:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7310s; samplesPerSecond = 875.6
+MPI Rank 0: 05/03/2016 14:47:39:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7178s; samplesPerSecond = 891.6
+MPI Rank 0: 05/03/2016 14:47:40:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7132s; samplesPerSecond = 897.4
+MPI Rank 0: 05/03/2016 14:47:40:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7099s; samplesPerSecond = 901.5
+MPI Rank 0: 05/03/2016 14:47:41:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7249s; samplesPerSecond = 882.9
+MPI Rank 0: 05/03/2016 14:47:42:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7100s; samplesPerSecond = 901.4
+MPI Rank 0: 05/03/2016 14:47:43:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7412s; samplesPerSecond = 863.5
+MPI Rank 0: 05/03/2016 14:47:43:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7092s; samplesPerSecond = 902.4
+MPI Rank 0: 05/03/2016 14:47:44:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7114s; samplesPerSecond = 899.6
+MPI Rank 0: 05/03/2016 14:47:45:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7236s; samplesPerSecond = 884.5
+MPI Rank 0: 05/03/2016 14:47:45:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7129s; samplesPerSecond = 897.7
+MPI Rank 0: 05/03/2016 14:47:46:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7222s; samplesPerSecond = 886.2
+MPI Rank 0: 05/03/2016 14:47:47:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7088s; samplesPerSecond = 902.9
+MPI Rank 0: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7114s; samplesPerSecond = 899.6
+MPI Rank 0: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7256s; samplesPerSecond = 882.1
+MPI Rank 0: 05/03/2016 14:47:49:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7230s; samplesPerSecond = 885.2
+MPI Rank 0: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7264s; samplesPerSecond = 881.1
+MPI Rank 0: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7029s; samplesPerSecond = 910.5
+MPI Rank 0: 05/03/2016 14:47:51:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7308s; samplesPerSecond = 875.8
+MPI Rank 0: 05/03/2016 14:47:52:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7047s; samplesPerSecond = 908.2
+MPI Rank 0: 05/03/2016 14:47:53:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.7068s; samplesPerSecond = 905.5
+MPI Rank 0: 05/03/2016 14:47:53:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6568s; samplesPerSecond = 974.4
+MPI Rank 0: 05/03/2016 14:47:54:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.4458s; samplesPerSecond = 1435.5
+MPI Rank 0: 05/03/2016 14:47:54: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.6474s
+MPI Rank 0: 05/03/2016 14:47:54: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:47:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:03: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.019357
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019655
-MPI Rank 0: 05/03/2016 15:53:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.9403s; samplesPerSecond = 2450.3
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019944
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019915
-MPI Rank 0: 05/03/2016 15:53:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9081s; samplesPerSecond = 2818.9
+MPI Rank 0: 05/03/2016 14:47:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.057573
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019845
+MPI Rank 0: Actual gradient aggregation time: 0.019018
+MPI Rank 0: 05/03/2016 14:47:55:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.9796s; samplesPerSecond = 2351.9
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019847
-MPI Rank 0: 05/03/2016 15:53:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9162s; samplesPerSecond = 2794.2
+MPI Rank 0: Actual gradient aggregation time: 0.018986
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019871
+MPI Rank 0: Actual gradient aggregation time: 0.020523
+MPI Rank 0: 05/03/2016 14:47:56:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.8973s; samplesPerSecond = 2853.0
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019919
-MPI Rank 0: 05/03/2016 15:53:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.9171s; samplesPerSecond = 2791.4
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.020113
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019855
-MPI Rank 0: 05/03/2016 15:53:08:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.8685s; samplesPerSecond = 2947.7
+MPI Rank 0: Actual gradient aggregation time: 0.019119
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019882
+MPI Rank 0: Actual gradient aggregation time: 0.019293
+MPI Rank 0: 05/03/2016 14:47:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9665s; samplesPerSecond = 2648.7
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.046585
-MPI Rank 0: 05/03/2016 15:53:09:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.9330s; samplesPerSecond = 2743.9
+MPI Rank 0: Actual gradient aggregation time: 0.019727
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019487
+MPI Rank 0: Actual gradient aggregation time: 0.01923
+MPI Rank 0: 05/03/2016 14:47:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.8884s; samplesPerSecond = 2881.6
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019864
-MPI Rank 0: 05/03/2016 15:53:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.8982s; samplesPerSecond = 2850.3
+MPI Rank 0: Actual gradient aggregation time: 0.019375
+MPI Rank 0: Async gradient aggregation wait time: 1e-006
+MPI Rank 0: Actual gradient aggregation time: 0.019126
+MPI Rank 0: 05/03/2016 14:47:59:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.9359s; samplesPerSecond = 2735.4
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019368
+MPI Rank 0: Actual gradient aggregation time: 0.019174
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019827
-MPI Rank 0: 05/03/2016 15:53:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.9169s; samplesPerSecond = 2792.0
-MPI Rank 0: Async gradient aggregation wait time: 0.01703
-MPI Rank 0: Actual gradient aggregation time: 0.025313
-MPI Rank 0: 05/03/2016 15:53:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.3489s
-MPI Rank 0: 05/03/2016 15:53:10: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: Actual gradient aggregation time: 0.019286
+MPI Rank 0: 05/03/2016 14:47:59:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.9350s; samplesPerSecond = 2737.9
+MPI Rank 0: Async gradient aggregation wait time: 3e-006
+MPI Rank 0: Actual gradient aggregation time: 0.020215
+MPI Rank 0: Async gradient aggregation wait time: 3e-006
+MPI Rank 0: Actual gradient aggregation time: 0.019192
+MPI Rank 0: 05/03/2016 14:48:00:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.9206s; samplesPerSecond = 2780.9
+MPI Rank 0: Async gradient aggregation wait time: 3e-006
+MPI Rank 0: Actual gradient aggregation time: 0.020185
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.019148
+MPI Rank 0: 05/03/2016 14:48:01:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.8909s; samplesPerSecond = 2873.5
+MPI Rank 0: Async gradient aggregation wait time: 0.016342
+MPI Rank 0: Actual gradient aggregation time: 0.024802
+MPI Rank 0: 05/03/2016 14:48:01: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.46123s
+MPI Rank 0: 05/03/2016 14:48:01: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:48:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:48:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019694
+MPI Rank 0: Actual gradient aggregation time: 0.016273
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.019015
+MPI Rank 0: 05/03/2016 14:48:05:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.3794s; samplesPerSecond = 2727.1
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.018593
-MPI Rank 0: 05/03/2016 15:53:14:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.2882s; samplesPerSecond = 2802.8
+MPI Rank 0: Actual gradient aggregation time: 0.019068
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019728
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019671
-MPI Rank 0: 05/03/2016 15:53:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.1244s; samplesPerSecond = 3277.5
-MPI Rank 0: 05/03/2016 15:53:17: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.45387s
-MPI Rank 0: 05/03/2016 15:53:17: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
+MPI Rank 0: Actual gradient aggregation time: 0.016638
+MPI Rank 0: 05/03/2016 14:48:08:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.1542s; samplesPerSecond = 3246.5
+MPI Rank 0: 05/03/2016 14:48:08: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.57844s
+MPI Rank 0: 05/03/2016 14:48:08: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:48:08: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:48:08: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.018646
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019231
-MPI Rank 0: 05/03/2016 15:53:20:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 3.2409s; samplesPerSecond = 2843.6
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019608
+MPI Rank 0: Actual gradient aggregation time: 0.017996
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.019855
-MPI Rank 0: 05/03/2016 15:53:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.1550s; samplesPerSecond = 3245.7
-MPI Rank 0: Async gradient aggregation wait time: 0.019372
-MPI Rank 0: 05/03/2016 15:53:23: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.43862s
-MPI Rank 0: 05/03/2016 15:53:24: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 15:53:24: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: Actual gradient aggregation time: 0.018534
+MPI Rank 0: 05/03/2016 14:48:11:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 3.2560s; samplesPerSecond = 2830.4
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.329849
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.266457
+MPI Rank 0: 05/03/2016 14:48:15:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.1484s; samplesPerSecond = 3252.4
+MPI Rank 0: Async gradient aggregation wait time: 0.020891
+MPI Rank 0: 05/03/2016 14:48:15: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.54696s
+MPI Rank 0: 05/03/2016 14:48:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:48:15: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:24: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:48:15: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:24: __COMPLETED__
-MPI Rank 1: 05/03/2016 15:52:38: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 15:52:38: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 15:52:38: Build info: 
+MPI Rank 0: 05/03/2016 14:48:15: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:47:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:47:28: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:47:28: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: 		Built time: May  3 2016 13:15:46
-MPI Rank 1: 05/03/2016 15:52:38: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 1: 05/03/2016 15:52:38: 		Build type: Release
-MPI Rank 1: 05/03/2016 15:52:38: 		Build target: GPU
-MPI Rank 1: 05/03/2016 15:52:38: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 15:52:38: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 15:52:38: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 15:52:38: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 15:52:38: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 15:52:38: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 15:52:38: 		Built by svcphil on cntk-muc01
-MPI Rank 1: 05/03/2016 15:52:38: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 15:52:38: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:47:28: 		Built time: May  3 2016 13:15:46
+MPI Rank 1: 05/03/2016 14:47:28: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 1: 05/03/2016 14:47:28: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:47:28: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:47:28: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:47:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:47:28: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:47:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:47:28: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:47:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:47:28: 		Built by svcphil on cntk-muc01
+MPI Rank 1: 05/03/2016 14:47:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:47:28: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Running on cntk-muc01 at 2016/05/03 15:52:38
-MPI Rank 1: 05/03/2016 15:52:38: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: 05/03/2016 14:47:28: Running on cntk-muc01 at 2016/05/03 14:47:28
+MPI Rank 1: 05/03/2016 14:47:28: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:52:38: precision = "float"
+MPI Rank 1: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:47:28: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -801,14 +802,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -816,18 +815,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:52:38: precision = "float"
+MPI Rank 1: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:47:28: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -911,14 +910,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -926,24 +923,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:47:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1025,35 +1022,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 15:52:38: Commands: speechTrain
-MPI Rank 1: 05/03/2016 15:52:38: Precision = "double"
-MPI Rank 1: 05/03/2016 15:52:38: Using 2 CPU threads.
-MPI Rank 1: 05/03/2016 15:52:38: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 15:52:38: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 15:52:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 14:47:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:47:28: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:47:28: Precision = "double"
+MPI Rank 1: 05/03/2016 14:47:28: Using 2 CPU threads.
+MPI Rank 1: 05/03/2016 14:47:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:47:28: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 14:47:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: ##############################################################################
-MPI Rank 1: 05/03/2016 15:52:38: #                                                                            #
-MPI Rank 1: 05/03/2016 15:52:38: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 15:52:38: #                                                                            #
-MPI Rank 1: 05/03/2016 15:52:38: ##############################################################################
+MPI Rank 1: 05/03/2016 14:47:28: ##############################################################################
+MPI Rank 1: 05/03/2016 14:47:28: #                                                                            #
+MPI Rank 1: 05/03/2016 14:47:28: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:47:28: #                                                                            #
+MPI Rank 1: 05/03/2016 14:47:28: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:47:28: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:47:29: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1105,14 +1101,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 14:47:29: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Training criterion node(s):
-MPI Rank 1: 05/03/2016 15:52:38: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:47:29: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:47:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:47:29: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:47:29: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1120,189 +1116,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 00000096F2B21A60: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 00000096F2B21CE0: {[W0 Value[512 x 363]] }
-MPI Rank 1: 00000096F2B21EC0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 00000096F2B22140: {[features Value[363 x *]] }
-MPI Rank 1: 00000096FB9F0EF0: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 00000096FB9F17B0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000096FB9F1FD0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000096FB9F2250: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 00000096FBA4FF50: {[W1 Value[512 x 512]] }
-MPI Rank 1: 00000096FBA501D0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 00000096FBA50270: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 00000096FBA50310: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 00000096FBA50450: {[B0 Value[512 x 1]] }
-MPI Rank 1: 00000096FBA50630: {[W2 Value[132 x 512]] }
-MPI Rank 1: 00000096FBA50770: {[LogOfPrior Value[132]] }
-MPI Rank 1: 00000096FBA50810: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 00000096FBA508B0: {[W0*features Value[512 x *]] }
-MPI Rank 1: 00000096FBA50950: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096FBA50A90: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 00000096FBA50B30: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096FBA50D10: {[labels Value[132 x *]] }
-MPI Rank 1: 00000096FBA50EF0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096FBA50F90: {[Prior Value[132]] }
-MPI Rank 1: 00000096FBA51030: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 00000096FBA510D0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 00000096FBA51170: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 00000096FBA518F0: {[B2 Value[132 x 1]] }
-MPI Rank 1: 00000096FBA51B70: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000AD5ADC0410: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000AD5ADC04B0: {[features Value[363 x *]] }
+MPI Rank 1: 000000AD5ADC0550: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 000000AD5ADC0730: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 000000AD5ADC0AF0: {[B0 Value[512 x 1]] }
+MPI Rank 1: 000000AD5ADC0D70: {[W1 Value[512 x 512]] }
+MPI Rank 1: 000000AD5ADD9940: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000AD5ADD99E0: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000AD5ADD9A80: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000AD5ADD9B20: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000AD5ADD9C60: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 000000AD5ADD9DA0: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000AD5ADD9EE0: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000AD5ADDA020: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDA0C0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000AD5ADDA2A0: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000AD5ADDA340: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDA3E0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000AD5ADDA520: {[LogOfPrior Value[132]] }
+MPI Rank 1: 000000AD5ADDA660: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDA7A0: {[labels Value[132 x *]] }
+MPI Rank 1: 000000AD5ADDA840: {[Prior Value[132]] }
+MPI Rank 1: 000000AD5ADDA8E0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000AD5ADDAAC0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDAC00: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDADE0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000AD5ADDAF20: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000AD5ADDB380: {[CrossEntropyWithSoftmax Value[1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:47:29: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:38: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 15:52:38: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 15:52:38: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:47:29: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:47:29: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:47:29: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:40: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:47:30: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:40: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:47:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:52:40: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 15:52:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9392s; samplesPerSecond = 681.4
-MPI Rank 1: 05/03/2016 15:52:42:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6836s; samplesPerSecond = 936.2
-MPI Rank 1: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6992s; samplesPerSecond = 915.3
-MPI Rank 1: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6738s; samplesPerSecond = 949.9
-MPI Rank 1: 05/03/2016 15:52:44:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7071s; samplesPerSecond = 905.2
-MPI Rank 1: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7137s; samplesPerSecond = 896.8
-MPI Rank 1: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6890s; samplesPerSecond = 928.9
-MPI Rank 1: 05/03/2016 15:52:46:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7020s; samplesPerSecond = 911.6
-MPI Rank 1: 05/03/2016 15:52:47:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6878s; samplesPerSecond = 930.5
-MPI Rank 1: 05/03/2016 15:52:47:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7369s; samplesPerSecond = 868.5
-MPI Rank 1: 05/03/2016 15:52:48:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6608s; samplesPerSecond = 968.5
-MPI Rank 1: 05/03/2016 15:52:49:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6664s; samplesPerSecond = 960.3
-MPI Rank 1: 05/03/2016 15:52:49:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6788s; samplesPerSecond = 942.9
-MPI Rank 1: 05/03/2016 15:52:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6586s; samplesPerSecond = 971.8
-MPI Rank 1: 05/03/2016 15:52:51:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.7042s; samplesPerSecond = 908.8
-MPI Rank 1: 05/03/2016 15:52:52:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7076s; samplesPerSecond = 904.4
-MPI Rank 1: 05/03/2016 15:52:52:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6605s; samplesPerSecond = 968.9
-MPI Rank 1: 05/03/2016 15:52:53:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7221s; samplesPerSecond = 886.3
-MPI Rank 1: 05/03/2016 15:52:54:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6824s; samplesPerSecond = 937.9
-MPI Rank 1: 05/03/2016 15:52:54:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7190s; samplesPerSecond = 890.2
-MPI Rank 1: 05/03/2016 15:52:55:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7137s; samplesPerSecond = 896.8
-MPI Rank 1: 05/03/2016 15:52:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6649s; samplesPerSecond = 962.5
-MPI Rank 1: 05/03/2016 15:52:56:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6778s; samplesPerSecond = 944.3
-MPI Rank 1: 05/03/2016 15:52:57:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6140s; samplesPerSecond = 1042.3
-MPI Rank 1: 05/03/2016 15:52:58:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.5536s; samplesPerSecond = 1156.1
-MPI Rank 1: 05/03/2016 15:52:58:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.5970s; samplesPerSecond = 1072.0
-MPI Rank 1: 05/03/2016 15:52:59:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7008s; samplesPerSecond = 913.3
-MPI Rank 1: 05/03/2016 15:53:00:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6970s; samplesPerSecond = 918.3
-MPI Rank 1: 05/03/2016 15:53:00:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6742s; samplesPerSecond = 949.3
-MPI Rank 1: 05/03/2016 15:53:01:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.7073s; samplesPerSecond = 904.9
-MPI Rank 1: 05/03/2016 15:53:02:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6938s; samplesPerSecond = 922.5
-MPI Rank 1: 05/03/2016 15:53:02:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6984s; samplesPerSecond = 916.4
-MPI Rank 1: 05/03/2016 15:53:02: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.1721s
+MPI Rank 1: 05/03/2016 14:47:31: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 14:47:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7169s; samplesPerSecond = 892.7
+MPI Rank 1: 05/03/2016 14:47:32:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6959s; samplesPerSecond = 919.7
+MPI Rank 1: 05/03/2016 14:47:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7160s; samplesPerSecond = 893.8
+MPI Rank 1: 05/03/2016 14:47:34:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7028s; samplesPerSecond = 910.6
+MPI Rank 1: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6777s; samplesPerSecond = 944.4
+MPI Rank 1: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6678s; samplesPerSecond = 958.4
+MPI Rank 1: 05/03/2016 14:47:36:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6654s; samplesPerSecond = 961.8
+MPI Rank 1: 05/03/2016 14:47:37:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6962s; samplesPerSecond = 919.3
+MPI Rank 1: 05/03/2016 14:47:37:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6642s; samplesPerSecond = 963.5
+MPI Rank 1: 05/03/2016 14:47:38:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6799s; samplesPerSecond = 941.3
+MPI Rank 1: 05/03/2016 14:47:39:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6764s; samplesPerSecond = 946.2
+MPI Rank 1: 05/03/2016 14:47:39:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6663s; samplesPerSecond = 960.6
+MPI Rank 1: 05/03/2016 14:47:40:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7031s; samplesPerSecond = 910.2
+MPI Rank 1: 05/03/2016 14:47:41:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7066s; samplesPerSecond = 905.7
+MPI Rank 1: 05/03/2016 14:47:41:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6895s; samplesPerSecond = 928.2
+MPI Rank 1: 05/03/2016 14:47:42:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7001s; samplesPerSecond = 914.2
+MPI Rank 1: 05/03/2016 14:47:43:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6808s; samplesPerSecond = 940.1
+MPI Rank 1: 05/03/2016 14:47:43:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6856s; samplesPerSecond = 933.4
+MPI Rank 1: 05/03/2016 14:47:44:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7015s; samplesPerSecond = 912.3
+MPI Rank 1: 05/03/2016 14:47:45:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6702s; samplesPerSecond = 954.9
+MPI Rank 1: 05/03/2016 14:47:46:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6931s; samplesPerSecond = 923.4
+MPI Rank 1: 05/03/2016 14:47:46:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6878s; samplesPerSecond = 930.5
+MPI Rank 1: 05/03/2016 14:47:47:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6852s; samplesPerSecond = 934.0
+MPI Rank 1: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6944s; samplesPerSecond = 921.7
+MPI Rank 1: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6971s; samplesPerSecond = 918.1
+MPI Rank 1: 05/03/2016 14:47:49:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6786s; samplesPerSecond = 943.1
+MPI Rank 1: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7320s; samplesPerSecond = 874.3
+MPI Rank 1: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6989s; samplesPerSecond = 915.7
+MPI Rank 1: 05/03/2016 14:47:51:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6702s; samplesPerSecond = 954.9
+MPI Rank 1: 05/03/2016 14:47:52:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6974s; samplesPerSecond = 917.7
+MPI Rank 1: 05/03/2016 14:47:52:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6724s; samplesPerSecond = 951.8
+MPI Rank 1: 05/03/2016 14:47:53:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6912s; samplesPerSecond = 925.9
+MPI Rank 1: 05/03/2016 14:47:53: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.0774s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:47:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:03: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.060118
+MPI Rank 1: 05/03/2016 14:47:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.01849
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.125201
-MPI Rank 1: 05/03/2016 15:53:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.9084s; samplesPerSecond = 2536.3
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.06065
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.055167
-MPI Rank 1: 05/03/2016 15:53:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9035s; samplesPerSecond = 2833.5
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.052859
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.068637
-MPI Rank 1: 05/03/2016 15:53:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9162s; samplesPerSecond = 2794.2
-MPI Rank 1: Async gradient aggregation wait time: 0.000294
-MPI Rank 1: Actual gradient aggregation time: 0.105561
+MPI Rank 1: Actual gradient aggregation time: 0.075755
+MPI Rank 1: 05/03/2016 14:47:55:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.9162s; samplesPerSecond = 2514.6
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.079302
-MPI Rank 1: 05/03/2016 15:53:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.8994s; samplesPerSecond = 2846.4
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.08804
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.060996
-MPI Rank 1: 05/03/2016 15:53:08:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.9519s; samplesPerSecond = 2689.3
+MPI Rank 1: Actual gradient aggregation time: 0.086112
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.04483
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.020723
-MPI Rank 1: 05/03/2016 15:53:08:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.8751s; samplesPerSecond = 2925.4
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.085153
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.084387
-MPI Rank 1: 05/03/2016 15:53:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.9282s; samplesPerSecond = 2758.1
+MPI Rank 1: Actual gradient aggregation time: 0.068262
+MPI Rank 1: 05/03/2016 14:47:56:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9314s; samplesPerSecond = 2748.5
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.042618
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.043116
-MPI Rank 1: 05/03/2016 15:53:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.8975s; samplesPerSecond = 2852.4
-MPI Rank 1: Async gradient aggregation wait time: 0.048817
-MPI Rank 1: Actual gradient aggregation time: 0.021241
-MPI Rank 1: 05/03/2016 15:53:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.36137s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.242104
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.328549
-MPI Rank 1: 05/03/2016 15:53:14:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.1117s; samplesPerSecond = 2961.7
+MPI Rank 1: Actual gradient aggregation time: 0.069842
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.248298
+MPI Rank 1: Actual gradient aggregation time: 0.093847
+MPI Rank 1: 05/03/2016 14:47:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9166s; samplesPerSecond = 2793.1
+MPI Rank 1: Async gradient aggregation wait time: 0.058287
+MPI Rank 1: Actual gradient aggregation time: 0.041421
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.246785
-MPI Rank 1: 05/03/2016 15:53:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.1739s; samplesPerSecond = 3226.3
-MPI Rank 1: 05/03/2016 15:53:17: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.45317s
+MPI Rank 1: Actual gradient aggregation time: 0.043238
+MPI Rank 1: 05/03/2016 14:47:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.9425s; samplesPerSecond = 2716.1
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.047332
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.111392
+MPI Rank 1: 05/03/2016 14:47:58:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.9341s; samplesPerSecond = 2740.5
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.060099
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.04386
+MPI Rank 1: 05/03/2016 14:47:59:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.9025s; samplesPerSecond = 2836.5
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.089769
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.075457
+MPI Rank 1: 05/03/2016 14:48:00:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.9221s; samplesPerSecond = 2776.3
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.071563
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.053891
+MPI Rank 1: 05/03/2016 14:48:01:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.9161s; samplesPerSecond = 2794.5
+MPI Rank 1: Async gradient aggregation wait time: 0.059511
+MPI Rank 1: Actual gradient aggregation time: 0.019543
+MPI Rank 1: 05/03/2016 14:48:01: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.47257s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:48:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:48:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.123451
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.142239
-MPI Rank 1: 05/03/2016 15:53:20:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 3.1637s; samplesPerSecond = 2913.1
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.13685
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.180515
-MPI Rank 1: 05/03/2016 15:53:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.1063s; samplesPerSecond = 3296.6
-MPI Rank 1: Async gradient aggregation wait time: 0.019381
-MPI Rank 1: 05/03/2016 15:53:23: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.43634s
-MPI Rank 1: 05/03/2016 15:53:24: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: Actual gradient aggregation time: 0.349924
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.152649
+MPI Rank 1: 05/03/2016 14:48:05:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.2369s; samplesPerSecond = 2847.2
+MPI Rank 1: Async gradient aggregation wait time: 0.094425
+MPI Rank 1: Actual gradient aggregation time: 0.165917
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.168035
+MPI Rank 1: 05/03/2016 14:48:08:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.2102s; samplesPerSecond = 3189.8
+MPI Rank 1: 05/03/2016 14:48:08: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.57689s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:24: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:48:08: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:24: __COMPLETED__
-MPI Rank 2: 05/03/2016 15:52:38: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 15:52:38: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 15:52:38: Build info: 
+MPI Rank 1: 05/03/2016 14:48:08: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.113415
+MPI Rank 1: Async gradient aggregation wait time: 3e-006
+MPI Rank 1: Actual gradient aggregation time: 0.112601
+MPI Rank 1: 05/03/2016 14:48:11:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 3.3457s; samplesPerSecond = 2754.6
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.018087
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.020186
+MPI Rank 1: 05/03/2016 14:48:15:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.1544s; samplesPerSecond = 3246.3
+MPI Rank 1: Async gradient aggregation wait time: 0.023136
+MPI Rank 1: 05/03/2016 14:48:15: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.54741s
+MPI Rank 1: 05/03/2016 14:48:15: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:48:15: Action "train" complete.
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:48:15: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:47:29: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:47:29: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:47:29: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: 		Built time: May  3 2016 13:15:46
-MPI Rank 2: 05/03/2016 15:52:38: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 2: 05/03/2016 15:52:38: 		Build type: Release
-MPI Rank 2: 05/03/2016 15:52:38: 		Build target: GPU
-MPI Rank 2: 05/03/2016 15:52:38: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 15:52:38: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 15:52:38: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 15:52:38: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 15:52:38: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 15:52:38: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 15:52:38: 		Built by svcphil on cntk-muc01
-MPI Rank 2: 05/03/2016 15:52:38: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 15:52:38: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:47:29: 		Built time: May  3 2016 13:15:46
+MPI Rank 2: 05/03/2016 14:47:29: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 2: 05/03/2016 14:47:29: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:47:29: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:47:29: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:47:29: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:47:29: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:47:29: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:47:29: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:47:29: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:47:29: 		Built by svcphil on cntk-muc01
+MPI Rank 2: 05/03/2016 14:47:29: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:47:29: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: Running on cntk-muc01 at 2016/05/03 15:52:38
-MPI Rank 2: 05/03/2016 15:52:38: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: 05/03/2016 14:47:29: Running on cntk-muc01 at 2016/05/03 14:47:29
+MPI Rank 2: 05/03/2016 14:47:29: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:52:38: precision = "float"
+MPI Rank 2: 05/03/2016 14:47:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:47:29: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1392,14 +1394,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1407,18 +1407,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:47:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:52:38: precision = "float"
+MPI Rank 2: 05/03/2016 14:47:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:47:29: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1502,14 +1502,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1517,24 +1515,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:47:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:47:29: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1616,35 +1614,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 15:52:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 15:52:38: Commands: speechTrain
-MPI Rank 2: 05/03/2016 15:52:38: Precision = "double"
-MPI Rank 2: 05/03/2016 15:52:38: Using 2 CPU threads.
-MPI Rank 2: 05/03/2016 15:52:38: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 15:52:38: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 15:52:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 14:47:29: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:47:29: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:47:29: Precision = "double"
+MPI Rank 2: 05/03/2016 14:47:29: Using 2 CPU threads.
+MPI Rank 2: 05/03/2016 14:47:29: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:47:29: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 14:47:29: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: ##############################################################################
-MPI Rank 2: 05/03/2016 15:52:38: #                                                                            #
-MPI Rank 2: 05/03/2016 15:52:38: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 15:52:38: #                                                                            #
-MPI Rank 2: 05/03/2016 15:52:38: ##############################################################################
+MPI Rank 2: 05/03/2016 14:47:29: ##############################################################################
+MPI Rank 2: 05/03/2016 14:47:29: #                                                                            #
+MPI Rank 2: 05/03/2016 14:47:29: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:47:29: #                                                                            #
+MPI Rank 2: 05/03/2016 14:47:29: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:38: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:47:29: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:47:29: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1696,14 +1693,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 14:47:29: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: Training criterion node(s):
-MPI Rank 2: 05/03/2016 15:52:39: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:47:29: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:47:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:47:29: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:47:29: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1711,160 +1708,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0000000A2F997B00: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0000000A2F997CE0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0000000A2F9985A0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0000000A2F998AA0: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0000000A2F99E8F0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0000000A2F99E990: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0000000A2F99EA30: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0000000A2F99EE90: {[features Value[363 x *]] }
-MPI Rank 2: 0000000A388FCC80: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0000000A388FCE60: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0000000A388FCF00: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0000000A388FCFA0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0000000A388FD040: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0000000A388FD180: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0000000A388FD360: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0000000A388FD5E0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0000000A388FD860: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0000000A388FDAE0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0000000A388FDB80: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0000000A388FDC20: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0000000A388FDD60: {[Prior Value[132]] }
-MPI Rank 2: 0000000A388FDEA0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0000000A388FE1C0: {[labels Value[132 x *]] }
-MPI Rank 2: 0000000A388FE440: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0000000A388FE940: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0000000A388FE9E0: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0000000A388FEA80: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0000000A388FEB20: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000CD4D10EFF0: {[features Value[363 x *]] }
+MPI Rank 2: 000000CD4D10F310: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000CD4D10F450: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000CD4D10F770: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000CD4D10FC70: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000CD4D10FEF0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000CD4D144AB0: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000CD4D144B50: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000CD4D144C90: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000CD4D144DD0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000CD4D144F10: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 000000CD4D1450F0: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000CD4D1452D0: {[Prior Value[132]] }
+MPI Rank 2: 000000CD4D145550: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000CD4D1455F0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000CD4D145690: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000CD4D1457D0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000CD4D145A50: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000CD4D145B90: {[labels Value[132 x *]] }
+MPI Rank 2: 000000CD4D145D70: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000CD4D145E10: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000CD4D146130: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000CD4D1461D0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000CD4D1464F0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000CD4D146590: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000CD4D146630: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000CD4D1466D0: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000CD4D146770: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:47:29: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:39: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 15:52:39: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 15:52:39: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:47:29: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:47:29: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:47:29: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:40: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:47:31: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:40: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:47:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:52:40: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 15:52:41:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9659s; samplesPerSecond = 662.6
-MPI Rank 2: 05/03/2016 15:52:42:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6751s; samplesPerSecond = 948.0
-MPI Rank 2: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6949s; samplesPerSecond = 920.9
-MPI Rank 2: 05/03/2016 15:52:43:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6988s; samplesPerSecond = 915.8
-MPI Rank 2: 05/03/2016 15:52:44:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6820s; samplesPerSecond = 938.4
-MPI Rank 2: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6894s; samplesPerSecond = 928.3
-MPI Rank 2: 05/03/2016 15:52:45:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7033s; samplesPerSecond = 910.0
-MPI Rank 2: 05/03/2016 15:52:46:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6667s; samplesPerSecond = 959.9
-MPI Rank 2: 05/03/2016 15:52:47:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6854s; samplesPerSecond = 933.8
-MPI Rank 2: 05/03/2016 15:52:47:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6799s; samplesPerSecond = 941.3
-MPI Rank 2: 05/03/2016 15:52:48:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7277s; samplesPerSecond = 879.4
-MPI Rank 2: 05/03/2016 15:52:49:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7281s; samplesPerSecond = 879.0
-MPI Rank 2: 05/03/2016 15:52:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7072s; samplesPerSecond = 905.0
-MPI Rank 2: 05/03/2016 15:52:50:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7087s; samplesPerSecond = 903.0
-MPI Rank 2: 05/03/2016 15:52:51:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6742s; samplesPerSecond = 949.3
-MPI Rank 2: 05/03/2016 15:52:52:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6817s; samplesPerSecond = 938.9
-MPI Rank 2: 05/03/2016 15:52:52:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6541s; samplesPerSecond = 978.4
-MPI Rank 2: 05/03/2016 15:52:53:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6818s; samplesPerSecond = 938.6
-MPI Rank 2: 05/03/2016 15:52:54:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6824s; samplesPerSecond = 937.9
-MPI Rank 2: 05/03/2016 15:52:54:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6541s; samplesPerSecond = 978.5
-MPI Rank 2: 05/03/2016 15:52:55:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6756s; samplesPerSecond = 947.4
-MPI Rank 2: 05/03/2016 15:52:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7151s; samplesPerSecond = 895.0
-MPI Rank 2: 05/03/2016 15:52:56:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6595s; samplesPerSecond = 970.4
-MPI Rank 2: 05/03/2016 15:52:57:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7585s; samplesPerSecond = 843.7
-MPI Rank 2: 05/03/2016 15:52:58:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.8378s; samplesPerSecond = 763.9
-MPI Rank 2: 05/03/2016 15:52:59:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6848s; samplesPerSecond = 934.6
-MPI Rank 2: 05/03/2016 15:52:59:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6835s; samplesPerSecond = 936.4
-MPI Rank 2: 05/03/2016 15:53:00:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6888s; samplesPerSecond = 929.1
-MPI Rank 2: 05/03/2016 15:53:01:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6450s; samplesPerSecond = 992.3
-MPI Rank 2: 05/03/2016 15:53:01:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6625s; samplesPerSecond = 966.0
-MPI Rank 2: 05/03/2016 15:53:02:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6351s; samplesPerSecond = 1007.7
-MPI Rank 2: 05/03/2016 15:53:03:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5737s; samplesPerSecond = 1115.5
-MPI Rank 2: 05/03/2016 15:53:03: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.3499s
+MPI Rank 2: 05/03/2016 14:47:31: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 14:47:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.6650s; samplesPerSecond = 962.4
+MPI Rank 2: 05/03/2016 14:47:32:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7074s; samplesPerSecond = 904.7
+MPI Rank 2: 05/03/2016 14:47:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.7266s; samplesPerSecond = 880.8
+MPI Rank 2: 05/03/2016 14:47:34:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.7170s; samplesPerSecond = 892.6
+MPI Rank 2: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.7299s; samplesPerSecond = 876.9
+MPI Rank 2: 05/03/2016 14:47:35:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7227s; samplesPerSecond = 885.5
+MPI Rank 2: 05/03/2016 14:47:36:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7302s; samplesPerSecond = 876.5
+MPI Rank 2: 05/03/2016 14:47:37:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7148s; samplesPerSecond = 895.3
+MPI Rank 2: 05/03/2016 14:47:38:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7290s; samplesPerSecond = 877.9
+MPI Rank 2: 05/03/2016 14:47:38:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7239s; samplesPerSecond = 884.1
+MPI Rank 2: 05/03/2016 14:47:39:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7129s; samplesPerSecond = 897.8
+MPI Rank 2: 05/03/2016 14:47:40:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7239s; samplesPerSecond = 884.1
+MPI Rank 2: 05/03/2016 14:47:40:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6971s; samplesPerSecond = 918.1
+MPI Rank 2: 05/03/2016 14:47:41:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7085s; samplesPerSecond = 903.3
+MPI Rank 2: 05/03/2016 14:47:42:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6882s; samplesPerSecond = 929.9
+MPI Rank 2: 05/03/2016 14:47:42:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7096s; samplesPerSecond = 901.9
+MPI Rank 2: 05/03/2016 14:47:43:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7228s; samplesPerSecond = 885.4
+MPI Rank 2: 05/03/2016 14:47:44:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7069s; samplesPerSecond = 905.4
+MPI Rank 2: 05/03/2016 14:47:45:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7157s; samplesPerSecond = 894.2
+MPI Rank 2: 05/03/2016 14:47:45:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7240s; samplesPerSecond = 884.0
+MPI Rank 2: 05/03/2016 14:47:46:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7113s; samplesPerSecond = 899.8
+MPI Rank 2: 05/03/2016 14:47:47:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7195s; samplesPerSecond = 889.5
+MPI Rank 2: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7128s; samplesPerSecond = 897.9
+MPI Rank 2: 05/03/2016 14:47:48:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7085s; samplesPerSecond = 903.4
+MPI Rank 2: 05/03/2016 14:47:49:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.7183s; samplesPerSecond = 891.0
+MPI Rank 2: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6740s; samplesPerSecond = 949.6
+MPI Rank 2: 05/03/2016 14:47:50:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7063s; samplesPerSecond = 906.1
+MPI Rank 2: 05/03/2016 14:47:51:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7220s; samplesPerSecond = 886.4
+MPI Rank 2: 05/03/2016 14:47:52:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7104s; samplesPerSecond = 901.0
+MPI Rank 2: 05/03/2016 14:47:52:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.7417s; samplesPerSecond = 862.8
+MPI Rank 2: 05/03/2016 14:47:53:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.7054s; samplesPerSecond = 907.3
+MPI Rank 2: 05/03/2016 14:47:54:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.4783s; samplesPerSecond = 1338.0
+MPI Rank 2: 05/03/2016 14:47:54: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=22.6007s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:47:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:03: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.117076
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.104024
-MPI Rank 2: 05/03/2016 15:53:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.8745s; samplesPerSecond = 2634.8
+MPI Rank 2: 05/03/2016 14:47:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.103126
+MPI Rank 2: Async gradient aggregation wait time: 0.044859
+MPI Rank 2: Actual gradient aggregation time: 0.09176
+MPI Rank 2: 05/03/2016 14:47:55:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14667310 * 2304; EvalErrorPrediction = 0.57595486 * 2304; time = 0.8821s; samplesPerSecond = 2612.0
+MPI Rank 2: Async gradient aggregation wait time: 0.057055
+MPI Rank 2: Actual gradient aggregation time: 0.091031
+MPI Rank 2: Async gradient aggregation wait time: 0.008171
+MPI Rank 2: Actual gradient aggregation time: 0.095744
+MPI Rank 2: 05/03/2016 14:47:56:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9368s; samplesPerSecond = 2732.8
 MPI Rank 2: Async gradient aggregation wait time: 1e-006
-MPI Rank 2: Actual gradient aggregation time: 0.101944
-MPI Rank 2: Async gradient aggregation wait time: 0.002112
-MPI Rank 2: Actual gradient aggregation time: 0.102174
-MPI Rank 2: 05/03/2016 15:53:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.09196197 * 2560; EvalErrorPrediction = 0.57539063 * 2560; time = 0.9177s; samplesPerSecond = 2789.6
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.078845
-MPI Rank 2: Async gradient aggregation wait time: 0.042063
-MPI Rank 2: Actual gradient aggregation time: 0.085881
-MPI Rank 2: 05/03/2016 15:53:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9054s; samplesPerSecond = 2827.5
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.091782
-MPI Rank 2: Async gradient aggregation wait time: 0.003406
-MPI Rank 2: Actual gradient aggregation time: 0.104167
-MPI Rank 2: 05/03/2016 15:53:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.9223s; samplesPerSecond = 2775.5
-MPI Rank 2: Async gradient aggregation wait time: 0.050012
-MPI Rank 2: Actual gradient aggregation time: 0.092725
-MPI Rank 2: Async gradient aggregation wait time: 0.000834
-MPI Rank 2: Actual gradient aggregation time: 0.086635
-MPI Rank 2: 05/03/2016 15:53:08:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.8900s; samplesPerSecond = 2876.4
-MPI Rank 2: Async gradient aggregation wait time: 0.007003
-MPI Rank 2: Actual gradient aggregation time: 0.094868
-MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.081051
-MPI Rank 2: 05/03/2016 15:53:08:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.9191s; samplesPerSecond = 2785.3
-MPI Rank 2: Async gradient aggregation wait time: 0.000301
-MPI Rank 2: Actual gradient aggregation time: 0.111102
+MPI Rank 2: Actual gradient aggregation time: 0.108455
+MPI Rank 2: Async gradient aggregation wait time: 0.055224
+MPI Rank 2: Actual gradient aggregation time: 0.096521
+MPI Rank 2: 05/03/2016 14:47:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07059549 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.9245s; samplesPerSecond = 2769.0
+MPI Rank 2: Async gradient aggregation wait time: 0.040324
+MPI Rank 2: Actual gradient aggregation time: 0.043825
+MPI Rank 2: Async gradient aggregation wait time: 0.000433
+MPI Rank 2: Actual gradient aggregation time: 0.102422
+MPI Rank 2: 05/03/2016 14:47:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.03657413 * 2560; EvalErrorPrediction = 0.56289062 * 2560; time = 0.9115s; samplesPerSecond = 2808.6
+MPI Rank 2: Async gradient aggregation wait time: 0.015514
+MPI Rank 2: Actual gradient aggregation time: 0.086124
+MPI Rank 2: Async gradient aggregation wait time: 0.001398
+MPI Rank 2: Actual gradient aggregation time: 0.144902
+MPI Rank 2: 05/03/2016 14:47:58:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97277932 * 2560; EvalErrorPrediction = 0.55234375 * 2560; time = 0.9341s; samplesPerSecond = 2740.5
+MPI Rank 2: Async gradient aggregation wait time: 0.038547
+MPI Rank 2: Actual gradient aggregation time: 0.088037
 MPI Rank 2: Async gradient aggregation wait time: 1e-006
-MPI Rank 2: Actual gradient aggregation time: 0.108252
-MPI Rank 2: 05/03/2016 15:53:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.9144s; samplesPerSecond = 2799.7
+MPI Rank 2: Actual gradient aggregation time: 0.108385
+MPI Rank 2: 05/03/2016 14:47:59:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07885124 * 2560; EvalErrorPrediction = 0.57656250 * 2560; time = 0.9483s; samplesPerSecond = 2699.5
+MPI Rank 2: Async gradient aggregation wait time: 0.05439
+MPI Rank 2: Actual gradient aggregation time: 0.093369
+MPI Rank 2: Async gradient aggregation wait time: 0.041305
+MPI Rank 2: Actual gradient aggregation time: 0.094418
+MPI Rank 2: 05/03/2016 14:48:00:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02991602 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.9202s; samplesPerSecond = 2782.1
+MPI Rank 2: Async gradient aggregation wait time: 0.044904
+MPI Rank 2: Actual gradient aggregation time: 0.078154
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.070719
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.107726
-MPI Rank 2: 05/03/2016 15:53:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.9080s; samplesPerSecond = 2819.5
-MPI Rank 2: Async gradient aggregation wait time: 0.07071
-MPI Rank 2: Actual gradient aggregation time: 0.023771
-MPI Rank 2: 05/03/2016 15:53:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.35537s
+MPI Rank 2: Actual gradient aggregation time: 0.091906
+MPI Rank 2: 05/03/2016 14:48:01:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18907127 * 2560; EvalErrorPrediction = 0.61289063 * 2560; time = 0.9169s; samplesPerSecond = 2792.1
+MPI Rank 2: Async gradient aggregation wait time: 0.061138
+MPI Rank 2: Actual gradient aggregation time: 0.021889
+MPI Rank 2: 05/03/2016 14:48:01: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07599170 * 20480; EvalErrorPrediction = 0.57314453 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.46613s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:48:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.11404
-MPI Rank 2: Actual gradient aggregation time: 0.347529
-MPI Rank 2: Async gradient aggregation wait time: 0.093875
-MPI Rank 2: Actual gradient aggregation time: 0.377905
-MPI Rank 2: 05/03/2016 15:53:14:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.1291s; samplesPerSecond = 2945.2
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.337266
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.329862
-MPI Rank 2: 05/03/2016 15:53:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.1541s; samplesPerSecond = 3246.6
-MPI Rank 2: 05/03/2016 15:53:17: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.44995s
+MPI Rank 2: 05/03/2016 14:48:01: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.243362
+MPI Rank 2: Actual gradient aggregation time: 0.404255
+MPI Rank 2: Async gradient aggregation wait time: 0.090539
+MPI Rank 2: Actual gradient aggregation time: 0.302721
+MPI Rank 2: 05/03/2016 14:48:04:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04504148 * 9216; EvalErrorPrediction = 0.55772569 * 9216; time = 3.0536s; samplesPerSecond = 3018.1
+MPI Rank 2: Async gradient aggregation wait time: 0.17987
+MPI Rank 2: Actual gradient aggregation time: 0.169592
+MPI Rank 2: Async gradient aggregation wait time: 0.08658
+MPI Rank 2: Actual gradient aggregation time: 0.325824
+MPI Rank 2: 05/03/2016 14:48:08:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.02126122 * 10240; EvalErrorPrediction = 0.56220703 * 10240; time = 3.2583s; samplesPerSecond = 3142.8
+MPI Rank 2: 05/03/2016 14:48:08: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.02870336 * 20480; EvalErrorPrediction = 0.55952148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.56755s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:48:08: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:48:08: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.220761
-MPI Rank 2: Async gradient aggregation wait time: 0.087443
-MPI Rank 2: Actual gradient aggregation time: 0.316021
-MPI Rank 2: 05/03/2016 15:53:20:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 2.9840s; samplesPerSecond = 3088.5
-MPI Rank 2: Async gradient aggregation wait time: 0.099892
-MPI Rank 2: Actual gradient aggregation time: 0.270221
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.257474
-MPI Rank 2: 05/03/2016 15:53:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.2775s; samplesPerSecond = 3124.4
-MPI Rank 2: Async gradient aggregation wait time: 0.019633
-MPI Rank 2: 05/03/2016 15:53:24: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.43308s
-MPI Rank 2: 05/03/2016 15:53:24: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: Actual gradient aggregation time: 0.252425
+MPI Rank 2: Async gradient aggregation wait time: 0.160351
+MPI Rank 2: Actual gradient aggregation time: 0.280032
+MPI Rank 2: 05/03/2016 14:48:11:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93105876 * 9216; EvalErrorPrediction = 0.53005642 * 9216; time = 3.0157s; samplesPerSecond = 3056.0
+MPI Rank 2: Async gradient aggregation wait time: 0.118586
+MPI Rank 2: Actual gradient aggregation time: 0.392845
+MPI Rank 2: Async gradient aggregation wait time: 0.093972
+MPI Rank 2: Actual gradient aggregation time: 0.322274
+MPI Rank 2: 05/03/2016 14:48:14:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.90311195 * 10240; EvalErrorPrediction = 0.51884766 * 10240; time = 3.3191s; samplesPerSecond = 3085.2
+MPI Rank 2: Async gradient aggregation wait time: 0.017806
+MPI Rank 2: 05/03/2016 14:48:15: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.91606100 * 20480; EvalErrorPrediction = 0.52392578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.53811s
+MPI Rank 2: 05/03/2016 14:48:15: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:24: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:48:15: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:24: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:48:15: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
index 001073cfc..01852c18b 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -68,9 +68,9 @@ ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 2 in a gearbox of 3
 mpihelper: we are cog 1 in a gearbox of 3
 mpihelper: we are cog 0 in a gearbox of 3
-mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: 3 nodes pinging each other
@@ -94,32 +94,32 @@ C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 15:53:26: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 15:53:26: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 15:53:26: Build info: 
+MPI Rank 0: 05/03/2016 14:48:17: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:48:17: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:48:17: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: 		Built time: May  3 2016 13:15:46
-MPI Rank 0: 05/03/2016 15:53:26: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 0: 05/03/2016 15:53:26: 		Build type: Release
-MPI Rank 0: 05/03/2016 15:53:26: 		Build target: GPU
-MPI Rank 0: 05/03/2016 15:53:26: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 15:53:26: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 15:53:26: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 15:53:26: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 15:53:26: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 15:53:26: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 15:53:26: 		Built by svcphil on cntk-muc01
-MPI Rank 0: 05/03/2016 15:53:26: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 15:53:26: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:48:17: 		Built time: May  3 2016 13:15:46
+MPI Rank 0: 05/03/2016 14:48:17: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 0: 05/03/2016 14:48:17: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:48:17: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:48:17: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:48:17: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:48:17: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:48:17: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:48:17: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:48:17: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:48:17: 		Built by svcphil on cntk-muc01
+MPI Rank 0: 05/03/2016 14:48:17: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:48:17: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: Running on cntk-muc01 at 2016/05/03 15:53:26
-MPI Rank 0: 05/03/2016 15:53:26: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: 05/03/2016 14:48:17: Running on cntk-muc01 at 2016/05/03 14:48:17
+MPI Rank 0: 05/03/2016 14:48:17: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:53:26: precision = "float"
+MPI Rank 0: 05/03/2016 14:48:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:48:17: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -209,14 +209,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -224,18 +222,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:48:17: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 15:53:26: precision = "float"
+MPI Rank 0: 05/03/2016 14:48:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:48:17: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -319,14 +317,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -334,24 +330,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:48:17: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:48:17: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -433,35 +429,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 15:53:26: Commands: speechTrain
-MPI Rank 0: 05/03/2016 15:53:26: Precision = "double"
-MPI Rank 0: 05/03/2016 15:53:26: Using 2 CPU threads.
-MPI Rank 0: 05/03/2016 15:53:26: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 15:53:26: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 15:53:26: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 14:48:17: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:48:17: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:48:17: Precision = "double"
+MPI Rank 0: 05/03/2016 14:48:17: Using 2 CPU threads.
+MPI Rank 0: 05/03/2016 14:48:17: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:48:17: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 14:48:17: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: ##############################################################################
-MPI Rank 0: 05/03/2016 15:53:26: #                                                                            #
-MPI Rank 0: 05/03/2016 15:53:26: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 15:53:26: #                                                                            #
-MPI Rank 0: 05/03/2016 15:53:26: ##############################################################################
+MPI Rank 0: 05/03/2016 14:48:17: ##############################################################################
+MPI Rank 0: 05/03/2016 14:48:17: #                                                                            #
+MPI Rank 0: 05/03/2016 14:48:17: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:48:17: #                                                                            #
+MPI Rank 0: 05/03/2016 14:48:17: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:48:17: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:26: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:48:17: Creating virgin network.
 MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -514,14 +509,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 14:48:18: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: Training criterion node(s):
-MPI Rank 0: 05/03/2016 15:53:27: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:48:18: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:48:18: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:48:18: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:48:18: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -529,193 +524,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 000000D6211A1740: {[W0 Value[512 x 363]] }
-MPI Rank 0: 000000D6211A1B00: {[B0 Value[512 x 1]] }
-MPI Rank 0: 000000D6211A1D80: {[W1 Value[512 x 512]] }
-MPI Rank 0: 000000D6211A20A0: {[B1 Value[512 x 1]] }
-MPI Rank 0: 000000D6211A2460: {[W2 Value[132 x 512]] }
-MPI Rank 0: 000000D6211A2A00: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 000000D6211A2AA0: {[B2 Value[132 x 1]] }
-MPI Rank 0: 000000D6211A2D20: {[labels Value[132 x *]] }
-MPI Rank 0: 000000D6211A34A0: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 000000D6215CE4C0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000D6215CE6A0: {[W0*features Value[512 x *]] }
-MPI Rank 0: 000000D6215CE880: {[LogOfPrior Value[132]] }
-MPI Rank 0: 000000D6215CE920: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D6215CEA60: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 000000D6215CED80: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 000000D6215CEEC0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 000000D6215CEF60: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 000000D6215CF1E0: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 000000D6215CF320: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 000000D6215CF6E0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 000000D6215CF780: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 000000D6215CF8C0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000D6215CFBE0: {[Prior Value[132]] }
-MPI Rank 0: 000000D6215CFFA0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D6215D0040: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 000000D6215D0180: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 000000D6215D0220: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D67F60CB30: {[features Value[363 x *]] }
+MPI Rank 0: 000000D4BE3F05F0: {[features Value[363 x *]] }
+MPI Rank 0: 000000D4DCEF7FA0: {[W2 Value[132 x 512]] }
+MPI Rank 0: 000000D4DCEF8720: {[B0 Value[512 x 1]] }
+MPI Rank 0: 000000D4DCEF8EA0: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 000000D4DCEF9440: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 000000D4DCEF96C0: {[W1 Value[512 x 512]] }
+MPI Rank 0: 000000D4DCEF98A0: {[B1 Value[512 x 1]] }
+MPI Rank 0: 000000D4DCEF9BC0: {[W0 Value[512 x 363]] }
+MPI Rank 0: 000000D4DF4AE3F0: {[labels Value[132 x *]] }
+MPI Rank 0: 000000D4DF4AE5D0: {[B2 Value[132 x 1]] }
+MPI Rank 0: 000000D4DF4AE670: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AE7B0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AE850: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AE8F0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AECB0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AEDF0: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000D4DF4AEE90: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 000000D4DF4AEFD0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 000000D4DF4AF110: {[W0*features Value[512 x *]] }
+MPI Rank 0: 000000D4DF4AF1B0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AF2F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 000000D4DF4AF390: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000D4DF4AF430: {[Prior Value[132]] }
+MPI Rank 0: 000000D4DF4AFB10: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 000000D4DF4AFED0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 000000D4DF4AFF70: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000D4DF4B01F0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 000000D4DF4B0290: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:48:18: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:27: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 15:53:27: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 15:53:27: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:48:18: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:48:18: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:48:18: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:31: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:48:23: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:48:24: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:32: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.5136s; samplesPerSecond = 1246.2
-MPI Rank 0: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3135s; samplesPerSecond = 2041.2
-MPI Rank 0: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3141s; samplesPerSecond = 2037.6
-MPI Rank 0: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3138s; samplesPerSecond = 2039.4
-MPI Rank 0: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3140s; samplesPerSecond = 2038.2
-MPI Rank 0: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3132s; samplesPerSecond = 2043.6
-MPI Rank 0: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3242s; samplesPerSecond = 1974.2
-MPI Rank 0: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3138s; samplesPerSecond = 2039.2
-MPI Rank 0: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3125s; samplesPerSecond = 2048.2
-MPI Rank 0: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3120s; samplesPerSecond = 2051.1
-MPI Rank 0: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3144s; samplesPerSecond = 2035.4
-MPI Rank 0: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3132s; samplesPerSecond = 2043.5
-MPI Rank 0: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3016s; samplesPerSecond = 2121.7
-MPI Rank 0: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3349s; samplesPerSecond = 1911.2
-MPI Rank 0: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3132s; samplesPerSecond = 2043.4
-MPI Rank 0: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3124s; samplesPerSecond = 2048.5
-MPI Rank 0: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3032s; samplesPerSecond = 2110.7
-MPI Rank 0: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3034s; samplesPerSecond = 2109.3
-MPI Rank 0: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3135s; samplesPerSecond = 2041.7
-MPI Rank 0: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3128s; samplesPerSecond = 2046.0
-MPI Rank 0: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3125s; samplesPerSecond = 2048.2
-MPI Rank 0: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3128s; samplesPerSecond = 2046.2
-MPI Rank 0: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3147s; samplesPerSecond = 2033.8
-MPI Rank 0: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3035s; samplesPerSecond = 2109.1
-MPI Rank 0: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3345s; samplesPerSecond = 1913.2
-MPI Rank 0: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3141s; samplesPerSecond = 2037.7
-MPI Rank 0: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3137s; samplesPerSecond = 2040.1
-MPI Rank 0: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3034s; samplesPerSecond = 2109.4
-MPI Rank 0: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3130s; samplesPerSecond = 2044.9
-MPI Rank 0: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3135s; samplesPerSecond = 2041.8
-MPI Rank 0: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3132s; samplesPerSecond = 2043.5
-MPI Rank 0: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2205s; samplesPerSecond = 2902.4
-MPI Rank 0: 05/03/2016 15:53:42: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.2479s
-MPI Rank 0: 05/03/2016 15:53:42: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:48:24: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3085s; samplesPerSecond = 2074.4
+MPI Rank 0: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3126s; samplesPerSecond = 2047.2
+MPI Rank 0: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3139s; samplesPerSecond = 2038.9
+MPI Rank 0: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3160s; samplesPerSecond = 2025.4
+MPI Rank 0: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3088s; samplesPerSecond = 2072.6
+MPI Rank 0: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3132s; samplesPerSecond = 2043.5
+MPI Rank 0: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3044s; samplesPerSecond = 2102.3
+MPI Rank 0: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3124s; samplesPerSecond = 2048.8
+MPI Rank 0: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3139s; samplesPerSecond = 2038.6
+MPI Rank 0: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3140s; samplesPerSecond = 2037.9
+MPI Rank 0: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3122s; samplesPerSecond = 2050.0
+MPI Rank 0: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3243s; samplesPerSecond = 1973.3
+MPI Rank 0: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3147s; samplesPerSecond = 2033.9
+MPI Rank 0: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3350s; samplesPerSecond = 1910.4
+MPI Rank 0: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3131s; samplesPerSecond = 2044.1
+MPI Rank 0: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3141s; samplesPerSecond = 2037.3
+MPI Rank 0: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3039s; samplesPerSecond = 2105.7
+MPI Rank 0: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3131s; samplesPerSecond = 2043.9
+MPI Rank 0: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3137s; samplesPerSecond = 2040.4
+MPI Rank 0: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3141s; samplesPerSecond = 2037.6
+MPI Rank 0: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3150s; samplesPerSecond = 2031.6
+MPI Rank 0: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3151s; samplesPerSecond = 2031.1
+MPI Rank 0: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3146s; samplesPerSecond = 2034.2
+MPI Rank 0: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3147s; samplesPerSecond = 2033.4
+MPI Rank 0: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3136s; samplesPerSecond = 2041.1
+MPI Rank 0: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3147s; samplesPerSecond = 2033.4
+MPI Rank 0: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3146s; samplesPerSecond = 2034.2
+MPI Rank 0: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3148s; samplesPerSecond = 2032.9
+MPI Rank 0: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3148s; samplesPerSecond = 2033.2
+MPI Rank 0: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3146s; samplesPerSecond = 2034.2
+MPI Rank 0: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3135s; samplesPerSecond = 2041.3
+MPI Rank 0: 05/03/2016 14:48:34:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3134s; samplesPerSecond = 2042.2
+MPI Rank 0: 05/03/2016 14:48:34: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0625s
+MPI Rank 0: 05/03/2016 14:48:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:43: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:48:34: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.035699
-MPI Rank 0: Async gradient aggregation wait time: 0.001381
-MPI Rank 0: Actual gradient aggregation time: 0.051258
-MPI Rank 0: 05/03/2016 15:53:43:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5254s; samplesPerSecond = 4385.3
-MPI Rank 0: Async gradient aggregation wait time: 0.010622
-MPI Rank 0: Actual gradient aggregation time: 0.050145
-MPI Rank 0: Async gradient aggregation wait time: 0.020558
-MPI Rank 0: Actual gradient aggregation time: 0.04895
-MPI Rank 0: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5019s; samplesPerSecond = 5100.8
-MPI Rank 0: Async gradient aggregation wait time: 0.004315
-MPI Rank 0: Actual gradient aggregation time: 0.049913
-MPI Rank 0: Async gradient aggregation wait time: 0.016495
-MPI Rank 0: Actual gradient aggregation time: 0.049233
-MPI Rank 0: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5032s; samplesPerSecond = 5087.0
-MPI Rank 0: Async gradient aggregation wait time: 0.00414
-MPI Rank 0: Actual gradient aggregation time: 0.051328
-MPI Rank 0: Async gradient aggregation wait time: 0.004317
-MPI Rank 0: Actual gradient aggregation time: 0.049165
-MPI Rank 0: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5073s; samplesPerSecond = 5046.2
-MPI Rank 0: Async gradient aggregation wait time: 0.008773
-MPI Rank 0: Actual gradient aggregation time: 0.049244
-MPI Rank 0: Async gradient aggregation wait time: 0.004437
-MPI Rank 0: Actual gradient aggregation time: 0.049396
-MPI Rank 0: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5018s; samplesPerSecond = 5101.3
-MPI Rank 0: Async gradient aggregation wait time: 0.019472
-MPI Rank 0: Actual gradient aggregation time: 0.045301
-MPI Rank 0: Async gradient aggregation wait time: 0.004386
-MPI Rank 0: Actual gradient aggregation time: 0.049159
-MPI Rank 0: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5050s; samplesPerSecond = 5069.6
-MPI Rank 0: Async gradient aggregation wait time: 0.004544
-MPI Rank 0: Actual gradient aggregation time: 0.049298
-MPI Rank 0: Async gradient aggregation wait time: 0.004194
-MPI Rank 0: Actual gradient aggregation time: 0.049972
-MPI Rank 0: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5061s; samplesPerSecond = 5058.7
-MPI Rank 0: Async gradient aggregation wait time: 0.016848
-MPI Rank 0: Actual gradient aggregation time: 0.050179
-MPI Rank 0: Async gradient aggregation wait time: 0.004351
-MPI Rank 0: Actual gradient aggregation time: 0.050603
-MPI Rank 0: 05/03/2016 15:53:47:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5058s; samplesPerSecond = 5061.0
-MPI Rank 0: Async gradient aggregation wait time: 0.011414
-MPI Rank 0: Actual gradient aggregation time: 0.012595
-MPI Rank 0: 05/03/2016 15:53:47: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.08905s
-MPI Rank 0: 05/03/2016 15:53:47: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 14:48:34: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.039904
+MPI Rank 0: Async gradient aggregation wait time: 0
+MPI Rank 0: Actual gradient aggregation time: 0.05001
+MPI Rank 0: 05/03/2016 14:48:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5216s; samplesPerSecond = 4417.3
+MPI Rank 0: Async gradient aggregation wait time: 0.010026
+MPI Rank 0: Actual gradient aggregation time: 0.052835
+MPI Rank 0: Async gradient aggregation wait time: 0.011632
+MPI Rank 0: Actual gradient aggregation time: 0.048292
+MPI Rank 0: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5052s; samplesPerSecond = 5067.4
+MPI Rank 0: Async gradient aggregation wait time: 0.008701
+MPI Rank 0: Actual gradient aggregation time: 0.05021
+MPI Rank 0: Async gradient aggregation wait time: 0.004254
+MPI Rank 0: Actual gradient aggregation time: 0.049927
+MPI Rank 0: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5096s; samplesPerSecond = 5024.0
+MPI Rank 0: Async gradient aggregation wait time: 0.004065
+MPI Rank 0: Actual gradient aggregation time: 0.051408
+MPI Rank 0: Async gradient aggregation wait time: 0.004093
+MPI Rank 0: Actual gradient aggregation time: 0.049267
+MPI Rank 0: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5104s; samplesPerSecond = 5016.0
+MPI Rank 0: Async gradient aggregation wait time: 0.004427
+MPI Rank 0: Actual gradient aggregation time: 0.05018
+MPI Rank 0: Async gradient aggregation wait time: 0.026697
+MPI Rank 0: Actual gradient aggregation time: 0.050019
+MPI Rank 0: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5075s; samplesPerSecond = 5044.8
+MPI Rank 0: Async gradient aggregation wait time: 0.003847
+MPI Rank 0: Actual gradient aggregation time: 0.044269
+MPI Rank 0: Async gradient aggregation wait time: 0.004206
+MPI Rank 0: Actual gradient aggregation time: 0.050662
+MPI Rank 0: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5120s; samplesPerSecond = 4999.8
+MPI Rank 0: Async gradient aggregation wait time: 0.009262
+MPI Rank 0: Actual gradient aggregation time: 0.0499
+MPI Rank 0: Async gradient aggregation wait time: 0.004143
+MPI Rank 0: Actual gradient aggregation time: 0.050594
+MPI Rank 0: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5084s; samplesPerSecond = 5035.0
+MPI Rank 0: Async gradient aggregation wait time: 0.010786
+MPI Rank 0: Actual gradient aggregation time: 0.051051
+MPI Rank 0: Async gradient aggregation wait time: 0.004038
+MPI Rank 0: Actual gradient aggregation time: 0.050994
+MPI Rank 0: 05/03/2016 14:48:38:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5117s; samplesPerSecond = 5002.7
+MPI Rank 0: Async gradient aggregation wait time: 0.011404
+MPI Rank 0: Actual gradient aggregation time: 0.012907
+MPI Rank 0: 05/03/2016 14:48:38: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.11606s
+MPI Rank 0: 05/03/2016 14:48:38: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:47: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:48:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.00155
-MPI Rank 0: Actual gradient aggregation time: 0.150204
-MPI Rank 0: Async gradient aggregation wait time: 0.020273
-MPI Rank 0: Actual gradient aggregation time: 0.174538
-MPI Rank 0: 05/03/2016 15:53:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5344s; samplesPerSecond = 6006.3
-MPI Rank 0: Async gradient aggregation wait time: 0.057131
-MPI Rank 0: Actual gradient aggregation time: 0.149227
-MPI Rank 0: Async gradient aggregation wait time: 0.114636
-MPI Rank 0: Actual gradient aggregation time: 0.150372
-MPI Rank 0: 05/03/2016 15:53:50:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5255s; samplesPerSecond = 6712.6
-MPI Rank 0: 05/03/2016 15:53:50: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.09281s
-MPI Rank 0: 05/03/2016 15:53:50: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
+MPI Rank 0: 05/03/2016 14:48:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.000764
+MPI Rank 0: Actual gradient aggregation time: 0.15265
+MPI Rank 0: Async gradient aggregation wait time: 0.022414
+MPI Rank 0: Actual gradient aggregation time: 0.173319
+MPI Rank 0: 05/03/2016 14:48:39:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5426s; samplesPerSecond = 5974.5
+MPI Rank 0: Async gradient aggregation wait time: 0.051791
+MPI Rank 0: Actual gradient aggregation time: 0.150759
+MPI Rank 0: Async gradient aggregation wait time: 0.04028
+MPI Rank 0: Actual gradient aggregation time: 0.150641
+MPI Rank 0: 05/03/2016 14:48:41:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5316s; samplesPerSecond = 6686.0
+MPI Rank 0: 05/03/2016 14:48:41: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10452s
+MPI Rank 0: 05/03/2016 14:48:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:50: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:48:41: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.004946
-MPI Rank 0: Actual gradient aggregation time: 0.151584
-MPI Rank 0: Async gradient aggregation wait time: 0.004017
-MPI Rank 0: Actual gradient aggregation time: 0.119394
-MPI Rank 0: 05/03/2016 15:53:52:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5002s; samplesPerSecond = 6143.2
-MPI Rank 0: Async gradient aggregation wait time: 0.012665
-MPI Rank 0: Actual gradient aggregation time: 0.149079
-MPI Rank 0: Async gradient aggregation wait time: 0.017201
-MPI Rank 0: Actual gradient aggregation time: 0.153037
-MPI Rank 0: 05/03/2016 15:53:53:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.4990s; samplesPerSecond = 6831.1
-MPI Rank 0: Async gradient aggregation wait time: 0.012493
-MPI Rank 0: 05/03/2016 15:53:53: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.05679s
-MPI Rank 0: 05/03/2016 15:53:53: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 15:53:53: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:48:41: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.095828
+MPI Rank 0: Actual gradient aggregation time: 0.151416
+MPI Rank 0: Async gradient aggregation wait time: 0.066538
+MPI Rank 0: Actual gradient aggregation time: 0.118739
+MPI Rank 0: 05/03/2016 14:48:43:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5083s; samplesPerSecond = 6110.1
+MPI Rank 0: Async gradient aggregation wait time: 0.004532
+MPI Rank 0: Actual gradient aggregation time: 0.150409
+MPI Rank 0: Async gradient aggregation wait time: 0.010838
+MPI Rank 0: Actual gradient aggregation time: 0.153741
+MPI Rank 0: 05/03/2016 14:48:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.5024s; samplesPerSecond = 6815.9
+MPI Rank 0: Async gradient aggregation wait time: 0.012018
+MPI Rank 0: 05/03/2016 14:48:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.06629s
+MPI Rank 0: 05/03/2016 14:48:44: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:48:44: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:53: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:48:44: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 15:53:53: __COMPLETED__
-MPI Rank 1: 05/03/2016 15:53:26: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 15:53:26: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 15:53:26: Build info: 
+MPI Rank 0: 05/03/2016 14:48:44: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:48:17: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:48:17: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:48:17: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: 		Built time: May  3 2016 13:15:46
-MPI Rank 1: 05/03/2016 15:53:26: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 1: 05/03/2016 15:53:26: 		Build type: Release
-MPI Rank 1: 05/03/2016 15:53:26: 		Build target: GPU
-MPI Rank 1: 05/03/2016 15:53:26: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 15:53:26: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 15:53:26: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 15:53:26: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 15:53:26: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 15:53:26: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 15:53:26: 		Built by svcphil on cntk-muc01
-MPI Rank 1: 05/03/2016 15:53:26: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 15:53:26: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:48:17: 		Built time: May  3 2016 13:15:46
+MPI Rank 1: 05/03/2016 14:48:17: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 1: 05/03/2016 14:48:17: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:48:17: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:48:17: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:48:17: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:48:17: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:48:17: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:48:17: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:48:17: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:48:17: 		Built by svcphil on cntk-muc01
+MPI Rank 1: 05/03/2016 14:48:17: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:48:17: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: Running on cntk-muc01 at 2016/05/03 15:53:26
-MPI Rank 1: 05/03/2016 15:53:26: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: 05/03/2016 14:48:17: Running on cntk-muc01 at 2016/05/03 14:48:17
+MPI Rank 1: 05/03/2016 14:48:17: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:53:26: precision = "float"
+MPI Rank 1: 05/03/2016 14:48:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:48:17: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -805,14 +806,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -820,18 +819,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:48:17: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 15:53:26: precision = "float"
+MPI Rank 1: 05/03/2016 14:48:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:48:17: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -915,14 +914,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -930,24 +927,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:48:18: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:48:18: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1029,35 +1026,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 15:53:26: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 15:53:26: Commands: speechTrain
-MPI Rank 1: 05/03/2016 15:53:26: Precision = "double"
-MPI Rank 1: 05/03/2016 15:53:26: Using 2 CPU threads.
-MPI Rank 1: 05/03/2016 15:53:26: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 15:53:26: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 15:53:26: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 14:48:18: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:48:18: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:48:18: Precision = "double"
+MPI Rank 1: 05/03/2016 14:48:18: Using 2 CPU threads.
+MPI Rank 1: 05/03/2016 14:48:18: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:48:18: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 14:48:18: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: ##############################################################################
-MPI Rank 1: 05/03/2016 15:53:26: #                                                                            #
-MPI Rank 1: 05/03/2016 15:53:26: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 15:53:26: #                                                                            #
-MPI Rank 1: 05/03/2016 15:53:26: ##############################################################################
+MPI Rank 1: 05/03/2016 14:48:18: ##############################################################################
+MPI Rank 1: 05/03/2016 14:48:18: #                                                                            #
+MPI Rank 1: 05/03/2016 14:48:18: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:48:18: #                                                                            #
+MPI Rank 1: 05/03/2016 14:48:18: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:26: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:48:18: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:48:18: Creating virgin network.
 MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1110,14 +1106,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 14:48:19: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: Training criterion node(s):
-MPI Rank 1: 05/03/2016 15:53:27: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:48:19: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:48:19: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:48:19: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:48:19: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1125,189 +1121,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0000006ADD3C4290: {[features Value[363 x *]] }
-MPI Rank 1: 0000006AFEAB5BD0: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0000006AFEAB5D10: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0000006AFEAB5DB0: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0000006AFEAB6030: {[labels Value[132 x *]] }
-MPI Rank 1: 0000006AFEAB6490: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0000006AFEAB6850: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0000006AFEAB6B70: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0000006AFEAB6E90: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0000006AFEAB72F0: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0000006AFEEE26D0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0000006AFEEE28B0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE2950: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE2A90: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE2DB0: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0000006AFEEE3030: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE30D0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0000006AFEEE3170: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0000006AFEEE32B0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE3350: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE33F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0000006AFEEE3710: {[Prior Value[132]] }
-MPI Rank 1: 0000006AFEEE37B0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0000006AFEEE3990: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0000006AFEEE3AD0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0000006AFEEE3DF0: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0000006AFEEE3E90: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0000006AFEEE41B0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 00000025226025A0: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0000002522603040: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0000002522603400: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0000002522603720: {[W0 Value[512 x 363]] }
+MPI Rank 1: 00000025226039A0: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0000002522603A40: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0000002522604300: {[B0 Value[512 x 1]] }
+MPI Rank 1: 00000025252D8D70: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 00000025252D9090: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 00000025252D91D0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 00000025252D9310: {[B2 Value[132 x 1]] }
+MPI Rank 1: 00000025252D93B0: {[labels Value[132 x *]] }
+MPI Rank 1: 00000025252D96D0: {[W0*features Value[512 x *]] }
+MPI Rank 1: 00000025252D9770: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 00000025252D9810: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 00000025252D9B30: {[LogOfPrior Value[132]] }
+MPI Rank 1: 00000025252D9C70: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 00000025252D9E50: {[Prior Value[132]] }
+MPI Rank 1: 00000025252DA030: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 00000025252DA0D0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 00000025252DA2B0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 00000025252DA350: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 00000025252DA3F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 00000025252DA490: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 00000025252DA670: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 00000025252DA990: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 00000025252DAAD0: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 000000257DC900F0: {[features Value[363 x *]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:48:19: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:27: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 15:53:27: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 15:53:27: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:48:19: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:48:19: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:48:19: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:32: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:48:23: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:48:24: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:32: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4507s; samplesPerSecond = 1420.2
-MPI Rank 1: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3135s; samplesPerSecond = 2041.7
-MPI Rank 1: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3139s; samplesPerSecond = 2039.0
-MPI Rank 1: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3146s; samplesPerSecond = 2034.1
-MPI Rank 1: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3132s; samplesPerSecond = 2043.3
-MPI Rank 1: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3134s; samplesPerSecond = 2042.0
-MPI Rank 1: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.2922s; samplesPerSecond = 2190.6
-MPI Rank 1: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3139s; samplesPerSecond = 2038.7
-MPI Rank 1: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3132s; samplesPerSecond = 2043.7
-MPI Rank 1: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3121s; samplesPerSecond = 2050.4
-MPI Rank 1: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3140s; samplesPerSecond = 2038.3
-MPI Rank 1: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3136s; samplesPerSecond = 2040.5
-MPI Rank 1: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3120s; samplesPerSecond = 2051.0
-MPI Rank 1: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3239s; samplesPerSecond = 1975.8
-MPI Rank 1: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3129s; samplesPerSecond = 2045.1
-MPI Rank 1: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.2926s; samplesPerSecond = 2187.6
-MPI Rank 1: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3234s; samplesPerSecond = 1979.2
-MPI Rank 1: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3027s; samplesPerSecond = 2114.1
-MPI Rank 1: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3138s; samplesPerSecond = 2039.8
-MPI Rank 1: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3138s; samplesPerSecond = 2039.3
-MPI Rank 1: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3127s; samplesPerSecond = 2046.6
-MPI Rank 1: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3127s; samplesPerSecond = 2046.6
-MPI Rank 1: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3137s; samplesPerSecond = 2040.0
-MPI Rank 1: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3140s; samplesPerSecond = 2038.5
-MPI Rank 1: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3244s; samplesPerSecond = 1972.8
-MPI Rank 1: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3138s; samplesPerSecond = 2039.2
-MPI Rank 1: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3143s; samplesPerSecond = 2036.0
-MPI Rank 1: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3023s; samplesPerSecond = 2116.8
-MPI Rank 1: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3138s; samplesPerSecond = 2039.5
-MPI Rank 1: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3137s; samplesPerSecond = 2040.3
-MPI Rank 1: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 1: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2821s; samplesPerSecond = 2268.4
-MPI Rank 1: 05/03/2016 15:53:42: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.2159s
+MPI Rank 1: 05/03/2016 14:48:24: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3081s; samplesPerSecond = 2077.5
+MPI Rank 1: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3167s; samplesPerSecond = 2021.1
+MPI Rank 1: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3098s; samplesPerSecond = 2065.6
+MPI Rank 1: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3124s; samplesPerSecond = 2048.9
+MPI Rank 1: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3129s; samplesPerSecond = 2045.3
+MPI Rank 1: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3168s; samplesPerSecond = 2020.3
+MPI Rank 1: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3216s; samplesPerSecond = 1990.3
+MPI Rank 1: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3121s; samplesPerSecond = 2050.7
+MPI Rank 1: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3139s; samplesPerSecond = 2038.9
+MPI Rank 1: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3143s; samplesPerSecond = 2036.2
+MPI Rank 1: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3120s; samplesPerSecond = 2051.4
+MPI Rank 1: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3185s; samplesPerSecond = 2009.5
+MPI Rank 1: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3315s; samplesPerSecond = 1930.5
+MPI Rank 1: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.2716s; samplesPerSecond = 2356.6
+MPI Rank 1: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3063s; samplesPerSecond = 2089.7
+MPI Rank 1: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3110s; samplesPerSecond = 2058.1
+MPI Rank 1: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3283s; samplesPerSecond = 1949.2
+MPI Rank 1: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3132s; samplesPerSecond = 2043.6
+MPI Rank 1: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3135s; samplesPerSecond = 2041.5
+MPI Rank 1: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3313s; samplesPerSecond = 1931.8
+MPI Rank 1: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3149s; samplesPerSecond = 2032.6
+MPI Rank 1: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3151s; samplesPerSecond = 2031.0
+MPI Rank 1: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3147s; samplesPerSecond = 2033.4
+MPI Rank 1: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3148s; samplesPerSecond = 2033.0
+MPI Rank 1: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3135s; samplesPerSecond = 2041.6
+MPI Rank 1: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3147s; samplesPerSecond = 2033.8
+MPI Rank 1: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3148s; samplesPerSecond = 2033.1
+MPI Rank 1: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3147s; samplesPerSecond = 2033.7
+MPI Rank 1: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3149s; samplesPerSecond = 2032.2
+MPI Rank 1: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3146s; samplesPerSecond = 2034.2
+MPI Rank 1: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3134s; samplesPerSecond = 2042.2
+MPI Rank 1: 05/03/2016 14:48:34:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3135s; samplesPerSecond = 2041.3
+MPI Rank 1: 05/03/2016 14:48:34: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0625s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:43: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:48:34: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:43: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.042933
-MPI Rank 1: Async gradient aggregation wait time: 0.028423
-MPI Rank 1: Actual gradient aggregation time: 0.049196
-MPI Rank 1: 05/03/2016 15:53:43:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5262s; samplesPerSecond = 4378.7
-MPI Rank 1: Async gradient aggregation wait time: 0.010402
-MPI Rank 1: Actual gradient aggregation time: 0.0508
-MPI Rank 1: Async gradient aggregation wait time: 0.002657
-MPI Rank 1: Actual gradient aggregation time: 0.047372
-MPI Rank 1: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5020s; samplesPerSecond = 5100.0
-MPI Rank 1: Async gradient aggregation wait time: 0.005188
-MPI Rank 1: Actual gradient aggregation time: 0.047739
-MPI Rank 1: Async gradient aggregation wait time: 0.004664
-MPI Rank 1: Actual gradient aggregation time: 0.047259
-MPI Rank 1: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5035s; samplesPerSecond = 5084.1
-MPI Rank 1: Async gradient aggregation wait time: 0.012769
-MPI Rank 1: Actual gradient aggregation time: 0.049028
-MPI Rank 1: Async gradient aggregation wait time: 0.005157
-MPI Rank 1: Actual gradient aggregation time: 0.046805
-MPI Rank 1: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5073s; samplesPerSecond = 5046.7
-MPI Rank 1: Async gradient aggregation wait time: 0.020087
-MPI Rank 1: Actual gradient aggregation time: 0.048341
-MPI Rank 1: Async gradient aggregation wait time: 0.005169
-MPI Rank 1: Actual gradient aggregation time: 0.047284
-MPI Rank 1: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5011s; samplesPerSecond = 5108.8
-MPI Rank 1: Async gradient aggregation wait time: 0.004041
-MPI Rank 1: Actual gradient aggregation time: 0.049986
-MPI Rank 1: Async gradient aggregation wait time: 0.005034
-MPI Rank 1: Actual gradient aggregation time: 0.047054
-MPI Rank 1: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5049s; samplesPerSecond = 5070.1
-MPI Rank 1: Async gradient aggregation wait time: 0.013274
-MPI Rank 1: Actual gradient aggregation time: 0.048404
-MPI Rank 1: Async gradient aggregation wait time: 0.004989
-MPI Rank 1: Actual gradient aggregation time: 0.047958
-MPI Rank 1: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5062s; samplesPerSecond = 5057.5
-MPI Rank 1: Async gradient aggregation wait time: 0.009254
-MPI Rank 1: Actual gradient aggregation time: 0.049486
-MPI Rank 1: Async gradient aggregation wait time: 0.005078
-MPI Rank 1: Actual gradient aggregation time: 0.048461
-MPI Rank 1: 05/03/2016 15:53:47:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5056s; samplesPerSecond = 5063.0
-MPI Rank 1: Async gradient aggregation wait time: 0.011402
-MPI Rank 1: Actual gradient aggregation time: 0.013081
-MPI Rank 1: 05/03/2016 15:53:47: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.08981s
+MPI Rank 1: 05/03/2016 14:48:34: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.038515
+MPI Rank 1: Async gradient aggregation wait time: 0.02335
+MPI Rank 1: Actual gradient aggregation time: 0.050677
+MPI Rank 1: 05/03/2016 14:48:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5223s; samplesPerSecond = 4411.1
+MPI Rank 1: Async gradient aggregation wait time: 0.012621
+MPI Rank 1: Actual gradient aggregation time: 0.04838
+MPI Rank 1: Async gradient aggregation wait time: 0.00441
+MPI Rank 1: Actual gradient aggregation time: 0.047081
+MPI Rank 1: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5051s; samplesPerSecond = 5068.4
+MPI Rank 1: Async gradient aggregation wait time: 0.004202
+MPI Rank 1: Actual gradient aggregation time: 0.050194
+MPI Rank 1: Async gradient aggregation wait time: 0.005173
+MPI Rank 1: Actual gradient aggregation time: 0.046251
+MPI Rank 1: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5094s; samplesPerSecond = 5025.7
+MPI Rank 1: Async gradient aggregation wait time: 0.004722
+MPI Rank 1: Actual gradient aggregation time: 0.049325
+MPI Rank 1: Async gradient aggregation wait time: 0.013248
+MPI Rank 1: Actual gradient aggregation time: 0.047017
+MPI Rank 1: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5102s; samplesPerSecond = 5018.0
+MPI Rank 1: Async gradient aggregation wait time: 0.005074
+MPI Rank 1: Actual gradient aggregation time: 0.047779
+MPI Rank 1: Async gradient aggregation wait time: 0.003996
+MPI Rank 1: Actual gradient aggregation time: 0.050274
+MPI Rank 1: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5073s; samplesPerSecond = 5045.9
+MPI Rank 1: Async gradient aggregation wait time: 0.004653
+MPI Rank 1: Actual gradient aggregation time: 0.0409
+MPI Rank 1: Async gradient aggregation wait time: 0.018882
+MPI Rank 1: Actual gradient aggregation time: 0.048308
+MPI Rank 1: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5116s; samplesPerSecond = 5003.8
+MPI Rank 1: Async gradient aggregation wait time: 0.009762
+MPI Rank 1: Actual gradient aggregation time: 0.050354
+MPI Rank 1: Async gradient aggregation wait time: 0.004904
+MPI Rank 1: Actual gradient aggregation time: 0.048445
+MPI Rank 1: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5086s; samplesPerSecond = 5033.3
+MPI Rank 1: Async gradient aggregation wait time: 0.011161
+MPI Rank 1: Actual gradient aggregation time: 0.051559
+MPI Rank 1: Async gradient aggregation wait time: 0.016342
+MPI Rank 1: Actual gradient aggregation time: 0.048919
+MPI Rank 1: 05/03/2016 14:48:38:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5114s; samplesPerSecond = 5005.5
+MPI Rank 1: Async gradient aggregation wait time: 0.011457
+MPI Rank 1: Actual gradient aggregation time: 0.010543
+MPI Rank 1: 05/03/2016 14:48:38: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.11674s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:47: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:48:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.065491
-MPI Rank 1: Actual gradient aggregation time: 0.149753
-MPI Rank 1: Async gradient aggregation wait time: 0.022585
-MPI Rank 1: Actual gradient aggregation time: 0.172554
-MPI Rank 1: 05/03/2016 15:53:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5372s; samplesPerSecond = 5995.4
-MPI Rank 1: Async gradient aggregation wait time: 0.002477
-MPI Rank 1: Actual gradient aggregation time: 0.148823
-MPI Rank 1: Async gradient aggregation wait time: 0.045341
-MPI Rank 1: Actual gradient aggregation time: 0.149244
-MPI Rank 1: 05/03/2016 15:53:50:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5261s; samplesPerSecond = 6710.1
-MPI Rank 1: 05/03/2016 15:53:50: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.093s
+MPI Rank 1: 05/03/2016 14:48:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.068059
+MPI Rank 1: Actual gradient aggregation time: 0.153027
+MPI Rank 1: Async gradient aggregation wait time: 0.023405
+MPI Rank 1: Actual gradient aggregation time: 0.170704
+MPI Rank 1: 05/03/2016 14:48:39:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5440s; samplesPerSecond = 5969.0
+MPI Rank 1: Async gradient aggregation wait time: 0.003025
+MPI Rank 1: Actual gradient aggregation time: 0.148642
+MPI Rank 1: Async gradient aggregation wait time: 0.041119
+MPI Rank 1: Actual gradient aggregation time: 0.147981
+MPI Rank 1: 05/03/2016 14:48:41:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5312s; samplesPerSecond = 6687.5
+MPI Rank 1: 05/03/2016 14:48:41: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10524s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:50: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:48:41: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:50: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.095643
-MPI Rank 1: Actual gradient aggregation time: 0.150633
-MPI Rank 1: Async gradient aggregation wait time: 0.004816
-MPI Rank 1: Actual gradient aggregation time: 0.12302
-MPI Rank 1: 05/03/2016 15:53:52:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5054s; samplesPerSecond = 6122.2
-MPI Rank 1: Async gradient aggregation wait time: 0.00505
-MPI Rank 1: Actual gradient aggregation time: 0.148591
-MPI Rank 1: Async gradient aggregation wait time: 0.004528
-MPI Rank 1: Actual gradient aggregation time: 0.150416
-MPI Rank 1: 05/03/2016 15:53:53:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.5217s; samplesPerSecond = 6729.3
-MPI Rank 1: Async gradient aggregation wait time: 0.012973
-MPI Rank 1: 05/03/2016 15:53:53: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.05747s
-MPI Rank 1: 05/03/2016 15:53:53: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:48:41: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.00358
+MPI Rank 1: Actual gradient aggregation time: 0.126463
+MPI Rank 1: Async gradient aggregation wait time: 0.016561
+MPI Rank 1: Actual gradient aggregation time: 0.119193
+MPI Rank 1: 05/03/2016 14:48:43:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5094s; samplesPerSecond = 6105.7
+MPI Rank 1: Async gradient aggregation wait time: 0.073365
+MPI Rank 1: Actual gradient aggregation time: 0.147889
+MPI Rank 1: Async gradient aggregation wait time: 0.004563
+MPI Rank 1: Actual gradient aggregation time: 0.151632
+MPI Rank 1: 05/03/2016 14:48:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.5042s; samplesPerSecond = 6807.5
+MPI Rank 1: Async gradient aggregation wait time: 0.012138
+MPI Rank 1: 05/03/2016 14:48:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.06709s
+MPI Rank 1: 05/03/2016 14:48:44: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:53: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:48:44: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 15:53:53: __COMPLETED__
-MPI Rank 2: 05/03/2016 15:53:27: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 15:53:27: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 15:53:27: Build info: 
+MPI Rank 1: 05/03/2016 14:48:44: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:48:18: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:48:18: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:48:18: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: 		Built time: May  3 2016 13:15:46
-MPI Rank 2: 05/03/2016 15:53:27: 		Last modified date: Tue Apr 26 23:35:31 2016
-MPI Rank 2: 05/03/2016 15:53:27: 		Build type: Release
-MPI Rank 2: 05/03/2016 15:53:27: 		Build target: GPU
-MPI Rank 2: 05/03/2016 15:53:27: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 15:53:27: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 15:53:27: 		CUB_PATH: c:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 15:53:27: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 15:53:27: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 15:53:27: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 15:53:27: 		Built by svcphil on cntk-muc01
-MPI Rank 2: 05/03/2016 15:53:27: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 15:53:27: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:48:18: 		Built time: May  3 2016 13:15:46
+MPI Rank 2: 05/03/2016 14:48:18: 		Last modified date: Tue Apr 26 23:35:31 2016
+MPI Rank 2: 05/03/2016 14:48:18: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:48:18: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:48:18: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:48:18: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:48:18: 		CUB_PATH: c:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:48:18: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:48:18: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:48:18: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:48:18: 		Built by svcphil on cntk-muc01
+MPI Rank 2: 05/03/2016 14:48:18: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:48:18: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: Running on cntk-muc01 at 2016/05/03 15:53:27
-MPI Rank 2: 05/03/2016 15:53:27: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: 05/03/2016 14:48:18: Running on cntk-muc01 at 2016/05/03 14:48:18
+MPI Rank 2: 05/03/2016 14:48:18: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=2  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:53:27: precision = "float"
+MPI Rank 2: 05/03/2016 14:48:18: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:48:18: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1397,14 +1399,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1412,18 +1412,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:48:18: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 15:53:27: precision = "float"
+MPI Rank 2: 05/03/2016 14:48:18: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:48:18: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1507,14 +1507,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]]
@@ -1522,24 +1520,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:48:18: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:48:18: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=2
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1621,35 +1619,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 15:53:27: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 15:53:27: Commands: speechTrain
-MPI Rank 2: 05/03/2016 15:53:27: Precision = "double"
-MPI Rank 2: 05/03/2016 15:53:27: Using 2 CPU threads.
-MPI Rank 2: 05/03/2016 15:53:27: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\ExperimentalHtkmlfReader\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 15:53:27: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 15:53:27: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 14:48:18: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:48:18: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:48:18: Precision = "double"
+MPI Rank 2: 05/03/2016 14:48:18: Using 2 CPU threads.
+MPI Rank 2: 05/03/2016 14:48:18: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132225.174972\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:48:18: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 14:48:18: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: ##############################################################################
-MPI Rank 2: 05/03/2016 15:53:27: #                                                                            #
-MPI Rank 2: 05/03/2016 15:53:27: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 15:53:27: #                                                                            #
-MPI Rank 2: 05/03/2016 15:53:27: ##############################################################################
+MPI Rank 2: 05/03/2016 14:48:18: ##############################################################################
+MPI Rank 2: 05/03/2016 14:48:18: #                                                                            #
+MPI Rank 2: 05/03/2016 14:48:18: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:48:18: #                                                                            #
+MPI Rank 2: 05/03/2016 14:48:18: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:48:18: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:27: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:48:18: Creating virgin network.
 MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1702,14 +1699,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 14:48:19: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: Training criterion node(s):
-MPI Rank 2: 05/03/2016 15:53:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:48:19: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:48:19: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:48:19: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:48:19: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1717,160 +1714,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0000009297A57450: {[features Value[363 x *]] }
-MPI Rank 2: 00000092B92D3EC0: {[W1 Value[512 x 512]] }
-MPI Rank 2: 00000092B92D3F60: {[B1 Value[512 x 1]] }
-MPI Rank 2: 00000092B92D4320: {[W2 Value[132 x 512]] }
-MPI Rank 2: 00000092B92D43C0: {[labels Value[132 x *]] }
-MPI Rank 2: 00000092B92D4460: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 00000092B92D45A0: {[W0 Value[512 x 363]] }
-MPI Rank 2: 00000092B92D46E0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 00000092B92D4B40: {[B0 Value[512 x 1]] }
-MPI Rank 2: 00000092B92D5720: {[B2 Value[132 x 1]] }
-MPI Rank 2: 00000092B9C5DAF0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 00000092B9C5DB90: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 00000092B9C5DCD0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 00000092B9C5DE10: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 00000092B9C5E090: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 00000092B9C5E450: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 00000092B9C5E4F0: {[LogOfPrior Value[132]] }
-MPI Rank 2: 00000092B9C5E590: {[W0*features Value[512 x *]] }
-MPI Rank 2: 00000092B9C5E8B0: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 00000092B9C5E9F0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 00000092B9C5EA90: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 00000092B9C5EB30: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092B9C5EC70: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 00000092B9C5EE50: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092B9C5F210: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092B9C5F2B0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 00000092B9C5F3F0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 00000092B9C5F490: {[Prior Value[132]] }
+MPI Rank 2: 000000C91E36FDB0: {[features Value[363 x *]] }
+MPI Rank 2: 000000C93E6AFF50: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000C93E6B0270: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000C93E6B04F0: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000C93E6B09F0: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000C93E6B0B30: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000C93E6B1030: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000C93E6B17B0: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000C93EFEA4B0: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000C93EFEA5F0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000C93EFEA690: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000C93EFEA7D0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000C93EFEA870: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000C93EFEA910: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000C93EFEAAF0: {[labels Value[132 x *]] }
+MPI Rank 2: 000000C93EFEAC30: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000C93EFEAD70: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000C93EFEAEB0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000C93EFEAF50: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000C93EFEB130: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000C93EFEB1D0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000C93EFEB270: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 000000C93EFEB4F0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000C93EFEB630: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000C93EFEB6D0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000C93EFEB770: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000C93EFEBC70: {[Prior Value[132]] }
+MPI Rank 2: 000000C93EFEBEF0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:48:19: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:28: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 15:53:28: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 15:53:28: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:48:19: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:48:19: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:48:19: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:32: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:48:24: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:48:24: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:32: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3264s; samplesPerSecond = 1960.6
-MPI Rank 2: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3126s; samplesPerSecond = 2047.3
-MPI Rank 2: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 2: 05/03/2016 15:53:33:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3152s; samplesPerSecond = 2030.1
-MPI Rank 2: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3125s; samplesPerSecond = 2047.8
-MPI Rank 2: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3139s; samplesPerSecond = 2039.1
-MPI Rank 2: 05/03/2016 15:53:34:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3028s; samplesPerSecond = 2113.4
-MPI Rank 2: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3347s; samplesPerSecond = 1912.4
-MPI Rank 2: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3136s; samplesPerSecond = 2041.0
-MPI Rank 2: 05/03/2016 15:53:35:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3124s; samplesPerSecond = 2048.6
-MPI Rank 2: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3126s; samplesPerSecond = 2047.3
-MPI Rank 2: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3138s; samplesPerSecond = 2039.8
-MPI Rank 2: 05/03/2016 15:53:36:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3129s; samplesPerSecond = 2045.6
-MPI Rank 2: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.2919s; samplesPerSecond = 2192.6
-MPI Rank 2: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3133s; samplesPerSecond = 2042.5
-MPI Rank 2: 05/03/2016 15:53:37:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3028s; samplesPerSecond = 2113.4
-MPI Rank 2: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3231s; samplesPerSecond = 1980.6
-MPI Rank 2: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3349s; samplesPerSecond = 1911.0
-MPI Rank 2: 05/03/2016 15:53:38:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3341s; samplesPerSecond = 1915.4
-MPI Rank 2: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3135s; samplesPerSecond = 2041.5
-MPI Rank 2: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3129s; samplesPerSecond = 2045.4
-MPI Rank 2: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3126s; samplesPerSecond = 2047.4
-MPI Rank 2: 05/03/2016 15:53:39:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3134s; samplesPerSecond = 2042.0
-MPI Rank 2: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3140s; samplesPerSecond = 2038.2
-MPI Rank 2: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.2927s; samplesPerSecond = 2186.9
-MPI Rank 2: 05/03/2016 15:53:40:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3140s; samplesPerSecond = 2038.1
-MPI Rank 2: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3143s; samplesPerSecond = 2036.3
-MPI Rank 2: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3135s; samplesPerSecond = 2041.3
-MPI Rank 2: 05/03/2016 15:53:41:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3340s; samplesPerSecond = 1916.0
-MPI Rank 2: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3139s; samplesPerSecond = 2038.9
-MPI Rank 2: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 2: 05/03/2016 15:53:42:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3132s; samplesPerSecond = 2043.6
-MPI Rank 2: 05/03/2016 15:53:42: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.153s
+MPI Rank 2: 05/03/2016 14:48:24: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3186s; samplesPerSecond = 2008.8
+MPI Rank 2: 05/03/2016 14:48:24:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3131s; samplesPerSecond = 2044.2
+MPI Rank 2: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3135s; samplesPerSecond = 2041.6
+MPI Rank 2: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3126s; samplesPerSecond = 2047.2
+MPI Rank 2: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3128s; samplesPerSecond = 2046.2
+MPI Rank 2: 05/03/2016 14:48:25:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3232s; samplesPerSecond = 1980.2
+MPI Rank 2: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3044s; samplesPerSecond = 2102.3
+MPI Rank 2: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3118s; samplesPerSecond = 2052.4
+MPI Rank 2: 05/03/2016 14:48:26:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3139s; samplesPerSecond = 2039.0
+MPI Rank 2: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3146s; samplesPerSecond = 2034.5
+MPI Rank 2: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3117s; samplesPerSecond = 2053.1
+MPI Rank 2: 05/03/2016 14:48:27:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3039s; samplesPerSecond = 2105.9
+MPI Rank 2: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3147s; samplesPerSecond = 2033.9
+MPI Rank 2: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3345s; samplesPerSecond = 1913.5
+MPI Rank 2: 05/03/2016 14:48:28:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.2924s; samplesPerSecond = 2188.9
+MPI Rank 2: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3352s; samplesPerSecond = 1909.3
+MPI Rank 2: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3042s; samplesPerSecond = 2104.1
+MPI Rank 2: 05/03/2016 14:48:29:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3131s; samplesPerSecond = 2044.2
+MPI Rank 2: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3137s; samplesPerSecond = 2040.2
+MPI Rank 2: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3138s; samplesPerSecond = 2039.6
+MPI Rank 2: 05/03/2016 14:48:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3151s; samplesPerSecond = 2030.8
+MPI Rank 2: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3151s; samplesPerSecond = 2031.0
+MPI Rank 2: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3146s; samplesPerSecond = 2034.6
+MPI Rank 2: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3148s; samplesPerSecond = 2033.3
+MPI Rank 2: 05/03/2016 14:48:31:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3136s; samplesPerSecond = 2040.9
+MPI Rank 2: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3147s; samplesPerSecond = 2033.7
+MPI Rank 2: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3146s; samplesPerSecond = 2034.3
+MPI Rank 2: 05/03/2016 14:48:32:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3150s; samplesPerSecond = 2031.8
+MPI Rank 2: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3148s; samplesPerSecond = 2032.8
+MPI Rank 2: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3146s; samplesPerSecond = 2034.1
+MPI Rank 2: 05/03/2016 14:48:33:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3134s; samplesPerSecond = 2042.0
+MPI Rank 2: 05/03/2016 14:48:34:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3137s; samplesPerSecond = 2039.9
+MPI Rank 2: 05/03/2016 14:48:34: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.063s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:43: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:48:34: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:43: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.078116
-MPI Rank 2: Async gradient aggregation wait time: 0.023088
-MPI Rank 2: Actual gradient aggregation time: 0.049965
-MPI Rank 2: 05/03/2016 15:53:43:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5275s; samplesPerSecond = 4367.7
+MPI Rank 2: 05/03/2016 14:48:34: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.054642
+MPI Rank 2: Async gradient aggregation wait time: 0.013826
+MPI Rank 2: Actual gradient aggregation time: 0.051527
+MPI Rank 2: 05/03/2016 14:48:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.14557030 * 2304; EvalErrorPrediction = 0.57812500 * 2304; time = 0.5220s; samplesPerSecond = 4413.5
 MPI Rank 2: Async gradient aggregation wait time: 0
-MPI Rank 2: Actual gradient aggregation time: 0.047654
-MPI Rank 2: Async gradient aggregation wait time: 0.01245
-MPI Rank 2: Actual gradient aggregation time: 0.048137
-MPI Rank 2: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5018s; samplesPerSecond = 5102.1
-MPI Rank 2: Async gradient aggregation wait time: 0.004654
-MPI Rank 2: Actual gradient aggregation time: 0.049697
-MPI Rank 2: Async gradient aggregation wait time: 0.004186
-MPI Rank 2: Actual gradient aggregation time: 0.048025
-MPI Rank 2: 05/03/2016 15:53:44:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5034s; samplesPerSecond = 5085.7
-MPI Rank 2: Async gradient aggregation wait time: 0.00447
-MPI Rank 2: Actual gradient aggregation time: 0.051188
-MPI Rank 2: Async gradient aggregation wait time: 0.004704
-MPI Rank 2: Actual gradient aggregation time: 0.048468
-MPI Rank 2: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5073s; samplesPerSecond = 5046.2
-MPI Rank 2: Async gradient aggregation wait time: 0.00423
-MPI Rank 2: Actual gradient aggregation time: 0.047963
-MPI Rank 2: Async gradient aggregation wait time: 0.004812
-MPI Rank 2: Actual gradient aggregation time: 0.049242
-MPI Rank 2: 05/03/2016 15:53:45:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5023s; samplesPerSecond = 5096.2
-MPI Rank 2: Async gradient aggregation wait time: 0.004478
-MPI Rank 2: Actual gradient aggregation time: 0.042995
-MPI Rank 2: Async gradient aggregation wait time: 0.026138
-MPI Rank 2: Actual gradient aggregation time: 0.049
-MPI Rank 2: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5040s; samplesPerSecond = 5079.2
-MPI Rank 2: Async gradient aggregation wait time: 0.004968
-MPI Rank 2: Actual gradient aggregation time: 0.049203
-MPI Rank 2: Async gradient aggregation wait time: 0.01357
-MPI Rank 2: Actual gradient aggregation time: 0.048717
-MPI Rank 2: 05/03/2016 15:53:46:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5061s; samplesPerSecond = 5058.1
-MPI Rank 2: Async gradient aggregation wait time: 0.003402
-MPI Rank 2: Actual gradient aggregation time: 0.050351
-MPI Rank 2: Async gradient aggregation wait time: 0.006671
-MPI Rank 2: Actual gradient aggregation time: 0.050432
-MPI Rank 2: 05/03/2016 15:53:47:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5060s; samplesPerSecond = 5059.1
-MPI Rank 2: Async gradient aggregation wait time: 0.010579
-MPI Rank 2: Actual gradient aggregation time: 0.013008
-MPI Rank 2: 05/03/2016 15:53:47: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.08938s
+MPI Rank 2: Actual gradient aggregation time: 0.046809
+MPI Rank 2: Async gradient aggregation wait time: 0.012005
+MPI Rank 2: Actual gradient aggregation time: 0.047804
+MPI Rank 2: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08093589 * 2560; EvalErrorPrediction = 0.57773438 * 2560; time = 0.5049s; samplesPerSecond = 5070.3
+MPI Rank 2: Async gradient aggregation wait time: 0.003799
+MPI Rank 2: Actual gradient aggregation time: 0.049903
+MPI Rank 2: Async gradient aggregation wait time: 0.004714
+MPI Rank 2: Actual gradient aggregation time: 0.046878
+MPI Rank 2: 05/03/2016 14:48:35:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.06764732 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5093s; samplesPerSecond = 5026.0
+MPI Rank 2: Async gradient aggregation wait time: 0.004391
+MPI Rank 2: Actual gradient aggregation time: 0.051294
+MPI Rank 2: Async gradient aggregation wait time: 0.00459
+MPI Rank 2: Actual gradient aggregation time: 0.049091
+MPI Rank 2: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.02760774 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.5102s; samplesPerSecond = 5017.8
+MPI Rank 2: Async gradient aggregation wait time: 0.012957
+MPI Rank 2: Actual gradient aggregation time: 0.049478
+MPI Rank 2: Async gradient aggregation wait time: 0.018159
+MPI Rank 2: Actual gradient aggregation time: 0.047512
+MPI Rank 2: 05/03/2016 14:48:36:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.97300714 * 2560; EvalErrorPrediction = 0.55156250 * 2560; time = 0.5073s; samplesPerSecond = 5046.3
+MPI Rank 2: Async gradient aggregation wait time: 0.021545
+MPI Rank 2: Actual gradient aggregation time: 0.044026
+MPI Rank 2: Async gradient aggregation wait time: 0.004574
+MPI Rank 2: Actual gradient aggregation time: 0.050478
+MPI Rank 2: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.07035181 * 2560; EvalErrorPrediction = 0.56835938 * 2560; time = 0.5117s; samplesPerSecond = 5002.9
+MPI Rank 2: Async gradient aggregation wait time: 0.003897
+MPI Rank 2: Actual gradient aggregation time: 0.050207
+MPI Rank 2: Async gradient aggregation wait time: 0.011774
+MPI Rank 2: Actual gradient aggregation time: 0.050448
+MPI Rank 2: 05/03/2016 14:48:37:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.02989276 * 2560; EvalErrorPrediction = 0.56132812 * 2560; time = 0.5086s; samplesPerSecond = 5033.2
+MPI Rank 2: Async gradient aggregation wait time: 0.004104
+MPI Rank 2: Actual gradient aggregation time: 0.051338
+MPI Rank 2: Async gradient aggregation wait time: 0.004425
+MPI Rank 2: Actual gradient aggregation time: 0.048588
+MPI Rank 2: 05/03/2016 14:48:38:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.18157839 * 2560; EvalErrorPrediction = 0.61757812 * 2560; time = 0.5114s; samplesPerSecond = 5005.6
+MPI Rank 2: Async gradient aggregation wait time: 0.011451
+MPI Rank 2: Actual gradient aggregation time: 0.012828
+MPI Rank 2: 05/03/2016 14:48:38: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.07254235 * 20480; EvalErrorPrediction = 0.57407227 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.11638s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:47: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:48:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.060128
-MPI Rank 2: Actual gradient aggregation time: 0.14939
+MPI Rank 2: 05/03/2016 14:48:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.117552
+MPI Rank 2: Actual gradient aggregation time: 0.153057
 MPI Rank 2: Async gradient aggregation wait time: 1e-006
-MPI Rank 2: Actual gradient aggregation time: 0.041366
-MPI Rank 2: 05/03/2016 15:53:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5351s; samplesPerSecond = 6003.7
-MPI Rank 2: Async gradient aggregation wait time: 0.05168
-MPI Rank 2: Actual gradient aggregation time: 0.149633
-MPI Rank 2: Async gradient aggregation wait time: 0.003765
-MPI Rank 2: Actual gradient aggregation time: 0.150498
-MPI Rank 2: 05/03/2016 15:53:50:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5250s; samplesPerSecond = 6714.9
-MPI Rank 2: 05/03/2016 15:53:50: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.09253s
+MPI Rank 2: Actual gradient aggregation time: 0.041514
+MPI Rank 2: 05/03/2016 14:48:39:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.10460850 * 9216; EvalErrorPrediction = 0.56998698 * 9216; time = 1.5443s; samplesPerSecond = 5967.7
+MPI Rank 2: Async gradient aggregation wait time: 0.096403
+MPI Rank 2: Actual gradient aggregation time: 0.150679
+MPI Rank 2: Async gradient aggregation wait time: 0.004018
+MPI Rank 2: Actual gradient aggregation time: 0.148604
+MPI Rank 2: 05/03/2016 14:48:41:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 2.06084998 * 10240; EvalErrorPrediction = 0.55947266 * 10240; time = 1.5313s; samplesPerSecond = 6687.1
+MPI Rank 2: 05/03/2016 14:48:41: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.07685066 * 20480; EvalErrorPrediction = 0.56367188 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.10491s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:50: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:48:41: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:50: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.00534
-MPI Rank 2: Actual gradient aggregation time: 0.150318
-MPI Rank 2: Async gradient aggregation wait time: 0.029396
-MPI Rank 2: Actual gradient aggregation time: 0.127074
-MPI Rank 2: 05/03/2016 15:53:52:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5004s; samplesPerSecond = 6142.4
-MPI Rank 2: Async gradient aggregation wait time: 0.004608
-MPI Rank 2: Actual gradient aggregation time: 0.149284
-MPI Rank 2: Async gradient aggregation wait time: 0.004078
-MPI Rank 2: Actual gradient aggregation time: 0.151081
-MPI Rank 2: 05/03/2016 15:53:53:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.5222s; samplesPerSecond = 6727.0
-MPI Rank 2: Async gradient aggregation wait time: 0.012705
-MPI Rank 2: 05/03/2016 15:53:53: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.05713s
-MPI Rank 2: 05/03/2016 15:53:53: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:48:41: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.035147
+MPI Rank 2: Actual gradient aggregation time: 0.15211
+MPI Rank 2: Async gradient aggregation wait time: 0.003794
+MPI Rank 2: Actual gradient aggregation time: 0.122389
+MPI Rank 2: 05/03/2016 14:48:43:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94976539 * 9216; EvalErrorPrediction = 0.53884549 * 9216; time = 1.5101s; samplesPerSecond = 6102.9
+MPI Rank 2: Async gradient aggregation wait time: 0.004856
+MPI Rank 2: Actual gradient aggregation time: 0.149663
+MPI Rank 2: Async gradient aggregation wait time: 0.004215
+MPI Rank 2: Actual gradient aggregation time: 0.153561
+MPI Rank 2: 05/03/2016 14:48:44:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91243517 * 10240; EvalErrorPrediction = 0.52392578 * 10240; time = 1.5329s; samplesPerSecond = 6680.1
+MPI Rank 2: Async gradient aggregation wait time: 0.012013
+MPI Rank 2: 05/03/2016 14:48:44: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.92947846 * 20480; EvalErrorPrediction = 0.53051758 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.06659s
+MPI Rank 2: 05/03/2016 14:48:44: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:53: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:48:44: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 15:53:53: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:48:44: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/run-test
index a981eabf3..2f8b474bf 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelBufferedAsyncGradientAggregation/run-test
@@ -8,6 +8,12 @@ LogFileName=stderr
 Instances=3
 NumCPUThreads=$(threadsPerInstance $Instances)
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
 cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]"
 ExitCode=$?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.cpu.txt
index 9602bd40f..5c3f4526d 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.cpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=1 stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/.. OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -34,8 +34,8 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
+Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
 -------------------------------------------------------------------
 Build info: 
@@ -57,7 +57,7 @@ Build info:
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
 --------------------------------------------------------------------------
-[[62948,1],0]: A high-performance Open MPI point-to-point messaging module
+[[62904,1],1]: A high-performance Open MPI point-to-point messaging module
 was unable to find any relevant network interfaces:
 
 Module: OpenFabrics (openib)
@@ -72,31 +72,31 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 0 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:06:55: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank0
 ping [requestnodes (before change)]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 1 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
-ping [mpihelper]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 0 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:06:56: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank1
-05/03/2016 18:06:56: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank2
-[870bdeb651b9:30024] 2 more processes have sent help message help-mpi-btl-base.txt / btl:no-nics
-[870bdeb651b9:30024] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
+ping [mpihelper]: all 3 nodes responded
+ping [mpihelper]: all 3 nodes responded
+05/03/2016 18:06:15: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank0
+05/03/2016 18:06:15: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank1
+05/03/2016 18:06:16: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank2
+[870bdeb651b9:29972] 2 more processes have sent help message help-mpi-btl-base.txt / btl:no-nics
+[870bdeb651b9:29972] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 2 with PID 30028 on
+mpiexec has exited due to process rank 0 with PID 29974 on
 node 870bdeb651b9 exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -119,32 +119,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:06:55: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:06:55: Build info: 
+MPI Rank 0: 05/03/2016 18:06:15: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:06:15: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:06:55: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 0: 05/03/2016 18:06:55: 		Build type: release
-MPI Rank 0: 05/03/2016 18:06:55: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:06:55: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 18:06:55: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:06:55: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:06:55: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:06:55: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:06:55: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:06:55: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:06:55: 		Built by philly on 18750d26eb32
-MPI Rank 0: 05/03/2016 18:06:55: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:06:55: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:06:15: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:06:15: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 0: 05/03/2016 18:06:15: 		Build type: release
+MPI Rank 0: 05/03/2016 18:06:15: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:06:15: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 18:06:15: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:06:15: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:06:15: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:06:15: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:06:15: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:06:15: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:06:15: 		Built by philly on 18750d26eb32
+MPI Rank 0: 05/03/2016 18:06:15: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:06:15: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Running on localhost at 2016/05/03 18:06:55
-MPI Rank 0: 05/03/2016 18:06:55: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: 05/03/2016 18:06:15: Running on localhost at 2016/05/03 18:06:15
+MPI Rank 0: 05/03/2016 18:06:15: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:06:55: precision = "float"
+MPI Rank 0: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:15: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -234,27 +234,25 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
-MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:06:55: precision = "float"
+MPI Rank 0: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:15: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -338,33 +336,31 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
-MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=float
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -446,35 +442,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 0: ]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:06:55: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:06:55: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:06:55: Precision = "float"
-MPI Rank 0: 05/03/2016 18:06:55: Using 1 CPU threads.
-MPI Rank 0: 05/03/2016 18:06:55: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:06:55: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 18:06:55: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:15: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:06:15: Precision = "float"
+MPI Rank 0: 05/03/2016 18:06:15: Using 1 CPU threads.
+MPI Rank 0: 05/03/2016 18:06:15: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:06:15: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 18:06:15: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: ##############################################################################
-MPI Rank 0: 05/03/2016 18:06:55: #                                                                            #
-MPI Rank 0: 05/03/2016 18:06:55: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:06:55: #                                                                            #
-MPI Rank 0: 05/03/2016 18:06:55: ##############################################################################
+MPI Rank 0: 05/03/2016 18:06:15: ##############################################################################
+MPI Rank 0: 05/03/2016 18:06:15: #                                                                            #
+MPI Rank 0: 05/03/2016 18:06:15: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:06:15: #                                                                            #
+MPI Rank 0: 05/03/2016 18:06:15: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:06:15: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:06:15: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -526,14 +521,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 18:06:15: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:06:55: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:06:15: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:06:15: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:06:15: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:06:15: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -541,135 +536,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x17c4938: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x17c54f8: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x17ceb88: {[features Value[363 x *]] }
-MPI Rank 0: 0x17d3b48: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x17e2068: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x1803348: {[labels Value[132 x *]] }
-MPI Rank 0: 0x1816a78: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x1864088: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x186e4c8: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x18c94b8: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x18c9678: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x18d6388: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x18d6548: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x18d89b8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x18d8b48: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x18d8d08: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x18da838: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x18da9f8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x18e8eb8: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x18f7d58: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x18f7eb8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x18f8188: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x18fd018: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x18fd1d8: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0x1900548: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x1900708: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x19008c8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x19034f8: {[Prior Value[132]] }
+MPI Rank 0: 0x1839158: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x18397a8: {[features Value[363 x *]] }
+MPI Rank 0: 0x18d3558: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x18d42f8: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x18d4508: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x18d46c8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x18d5908: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x18e2c88: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x18ed068: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x1938a48: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0x193d6a8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x1947a28: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x194cb18: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x194ccd8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x194ce98: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x194d058: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x194d218: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x1954428: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x1955e48: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x1956008: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x19561c8: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0x1958448: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x195ac38: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x195c838: {[Prior Value[132]] }
+MPI Rank 0: 0x195d838: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x195d998: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x195db58: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x195e318: {[labels Value[132 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:06:15: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:55: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:06:55: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:06:55: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:06:15: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:06:15: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:06:15: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:57: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:06:16: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:06:18: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:06:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:06:59:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.8691s; samplesPerSecond = 736.4
-MPI Rank 0: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.4325s; samplesPerSecond = 1479.6
-MPI Rank 0: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3175s; samplesPerSecond = 2015.7
-MPI Rank 0: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4235s; samplesPerSecond = 1511.1
-MPI Rank 0: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4066s; samplesPerSecond = 1574.1
-MPI Rank 0: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.6594s; samplesPerSecond = 970.6
-MPI Rank 0: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.4954s; samplesPerSecond = 1291.9
-MPI Rank 0: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.2985s; samplesPerSecond = 2143.9
-MPI Rank 0: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3541s; samplesPerSecond = 1807.6
-MPI Rank 0: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.4079s; samplesPerSecond = 1568.9
-MPI Rank 0: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4834s; samplesPerSecond = 1324.1
-MPI Rank 0: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.2926s; samplesPerSecond = 2187.1
-MPI Rank 0: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2755s; samplesPerSecond = 2323.4
-MPI Rank 0: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4523s; samplesPerSecond = 1415.0
-MPI Rank 0: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.3081s; samplesPerSecond = 2077.0
-MPI Rank 0: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.4132s; samplesPerSecond = 1549.0
-MPI Rank 0: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3466s; samplesPerSecond = 1846.6
-MPI Rank 0: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.4640s; samplesPerSecond = 1379.3
-MPI Rank 0: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.4517s; samplesPerSecond = 1416.9
-MPI Rank 0: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.6186s; samplesPerSecond = 1034.7
-MPI Rank 0: 05/03/2016 18:07:08:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 1.0821s; samplesPerSecond = 591.4
-MPI Rank 0: 05/03/2016 18:07:09:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.5661s; samplesPerSecond = 1130.6
-MPI Rank 0: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5765s; samplesPerSecond = 1110.1
-MPI Rank 0: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.6978s; samplesPerSecond = 917.2
-MPI Rank 0: 05/03/2016 18:07:11:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 1.1291s; samplesPerSecond = 566.8
-MPI Rank 0: 05/03/2016 18:07:12:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.6469s; samplesPerSecond = 989.3
-MPI Rank 0: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6568s; samplesPerSecond = 974.4
-MPI Rank 0: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3436s; samplesPerSecond = 1862.9
-MPI Rank 0: 05/03/2016 18:07:14:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.9962s; samplesPerSecond = 642.4
-MPI Rank 0: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.8790s; samplesPerSecond = 728.1
-MPI Rank 0: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.4021s; samplesPerSecond = 1591.5
-MPI Rank 0: 05/03/2016 18:07:16:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.3101s; samplesPerSecond = 2063.6
-MPI Rank 0: 05/03/2016 18:07:16: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=17.1612s
-MPI Rank 0: 05/03/2016 18:07:16: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:06:18: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.2686s; samplesPerSecond = 2382.8
+MPI Rank 0: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1977s; samplesPerSecond = 3238.0
+MPI Rank 0: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3989s; samplesPerSecond = 1604.6
+MPI Rank 0: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4826s; samplesPerSecond = 1326.3
+MPI Rank 0: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4104s; samplesPerSecond = 1559.3
+MPI Rank 0: 05/03/2016 18:06:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.7104s; samplesPerSecond = 900.9
+MPI Rank 0: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.5254s; samplesPerSecond = 1218.0
+MPI Rank 0: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.3665s; samplesPerSecond = 1746.2
+MPI Rank 0: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3177s; samplesPerSecond = 2014.5
+MPI Rank 0: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.5219s; samplesPerSecond = 1226.3
+MPI Rank 0: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4894s; samplesPerSecond = 1307.7
+MPI Rank 0: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.3969s; samplesPerSecond = 1612.6
+MPI Rank 0: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.4721s; samplesPerSecond = 1355.7
+MPI Rank 0: 05/03/2016 18:06:24:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4359s; samplesPerSecond = 1468.3
+MPI Rank 0: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 1.1126s; samplesPerSecond = 575.2
+MPI Rank 0: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.2828s; samplesPerSecond = 2262.9
+MPI Rank 0: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3493s; samplesPerSecond = 1832.2
+MPI Rank 0: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.5228s; samplesPerSecond = 1224.3
+MPI Rank 0: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.2957s; samplesPerSecond = 2164.5
+MPI Rank 0: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.2616s; samplesPerSecond = 2446.5
+MPI Rank 0: 05/03/2016 18:06:27:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.7614s; samplesPerSecond = 840.5
+MPI Rank 0: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7435s; samplesPerSecond = 860.7
+MPI Rank 0: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5536s; samplesPerSecond = 1156.0
+MPI Rank 0: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.4649s; samplesPerSecond = 1376.7
+MPI Rank 0: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.5566s; samplesPerSecond = 1149.8
+MPI Rank 0: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.3597s; samplesPerSecond = 1779.4
+MPI Rank 0: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.3050s; samplesPerSecond = 2098.4
+MPI Rank 0: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3494s; samplesPerSecond = 1831.5
+MPI Rank 0: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3540s; samplesPerSecond = 1807.9
+MPI Rank 0: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.4567s; samplesPerSecond = 1401.3
+MPI Rank 0: 05/03/2016 18:06:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5107s; samplesPerSecond = 1253.1
+MPI Rank 0: 05/03/2016 18:06:33:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.8977s; samplesPerSecond = 712.9
+MPI Rank 0: 05/03/2016 18:06:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=15.1579s
+MPI Rank 0: 05/03/2016 18:06:33: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:16: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:06:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:16: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:07:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1413s; samplesPerSecond = 2243.0
-MPI Rank 0: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.8840s; samplesPerSecond = 2895.8
-MPI Rank 0: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.6818s; samplesPerSecond = 3755.0
-MPI Rank 0: 05/03/2016 18:07:19:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 1.0125s; samplesPerSecond = 2528.4
-MPI Rank 0: 05/03/2016 18:07:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.7589s; samplesPerSecond = 3373.2
-MPI Rank 0: 05/03/2016 18:07:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.7444s; samplesPerSecond = 3438.9
-MPI Rank 0: 05/03/2016 18:07:22:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9851s; samplesPerSecond = 2598.8
-MPI Rank 0: 05/03/2016 18:07:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.7582s; samplesPerSecond = 3376.4
-MPI Rank 0: 05/03/2016 18:07:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.02629s
-MPI Rank 0: 05/03/2016 18:07:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 18:06:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.0647s; samplesPerSecond = 2404.4
+MPI Rank 0: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.5994s; samplesPerSecond = 4271.3
+MPI Rank 0: 05/03/2016 18:06:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.4100s; samplesPerSecond = 1815.6
+MPI Rank 0: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.7196s; samplesPerSecond = 3557.4
+MPI Rank 0: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.8913s; samplesPerSecond = 2872.2
+MPI Rank 0: 05/03/2016 18:06:39:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 1.0539s; samplesPerSecond = 2429.1
+MPI Rank 0: 05/03/2016 18:06:39:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9446s; samplesPerSecond = 2710.0
+MPI Rank 0: 05/03/2016 18:06:40:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.6444s; samplesPerSecond = 3973.0
+MPI Rank 0: 05/03/2016 18:06:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.33854s
+MPI Rank 0: 05/03/2016 18:06:40: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:06:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:07:25:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 1.9763s; samplesPerSecond = 5181.4
-MPI Rank 0: 05/03/2016 18:07:27:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.0670s; samplesPerSecond = 4954.0
-MPI Rank 0: 05/03/2016 18:07:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.06112s
-MPI Rank 0: 05/03/2016 18:07:27: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:07:27: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 18:06:40: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:43:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 2.3382s; samplesPerSecond = 4379.5
+MPI Rank 0: 05/03/2016 18:06:45:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.4048s; samplesPerSecond = 4258.2
+MPI Rank 0: 05/03/2016 18:06:45: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.75905s
+MPI Rank 0: 05/03/2016 18:06:45: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:06:45: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:27: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:06:45: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:27: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:06:56: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:06:56: Build info: 
+MPI Rank 0: 05/03/2016 18:06:45: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:06:15: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:06:15: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:06:56: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 1: 05/03/2016 18:06:56: 		Build type: release
-MPI Rank 1: 05/03/2016 18:06:56: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:06:56: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 18:06:56: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:06:56: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:06:56: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:06:56: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:06:56: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:06:56: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:06:56: 		Built by philly on 18750d26eb32
-MPI Rank 1: 05/03/2016 18:06:56: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:06:56: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:06:15: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:06:15: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 1: 05/03/2016 18:06:15: 		Build type: release
+MPI Rank 1: 05/03/2016 18:06:15: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:06:15: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 18:06:15: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:06:15: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:06:15: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:06:15: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:06:15: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:06:15: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:06:15: 		Built by philly on 18750d26eb32
+MPI Rank 1: 05/03/2016 18:06:15: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:06:15: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Running on localhost at 2016/05/03 18:06:56
-MPI Rank 1: 05/03/2016 18:06:56: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: 05/03/2016 18:06:15: Running on localhost at 2016/05/03 18:06:15
+MPI Rank 1: 05/03/2016 18:06:15: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:06:56: precision = "float"
+MPI Rank 1: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:15: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -759,27 +759,25 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
-MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:06:56: precision = "float"
+MPI Rank 1: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:15: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -863,33 +861,31 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
-MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:15: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=float
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -971,35 +967,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 1: ]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:06:56: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:06:56: Precision = "float"
-MPI Rank 1: 05/03/2016 18:06:56: Using 1 CPU threads.
-MPI Rank 1: 05/03/2016 18:06:56: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:06:56: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 18:06:56: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 18:06:15: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:15: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:06:15: Precision = "float"
+MPI Rank 1: 05/03/2016 18:06:15: Using 1 CPU threads.
+MPI Rank 1: 05/03/2016 18:06:15: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:06:15: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 18:06:15: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: ##############################################################################
-MPI Rank 1: 05/03/2016 18:06:56: #                                                                            #
-MPI Rank 1: 05/03/2016 18:06:56: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:06:56: #                                                                            #
-MPI Rank 1: 05/03/2016 18:06:56: ##############################################################################
+MPI Rank 1: 05/03/2016 18:06:15: ##############################################################################
+MPI Rank 1: 05/03/2016 18:06:15: #                                                                            #
+MPI Rank 1: 05/03/2016 18:06:15: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:06:15: #                                                                            #
+MPI Rank 1: 05/03/2016 18:06:15: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:06:15: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:06:15: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1051,14 +1046,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 18:06:16: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:06:56: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:06:16: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:06:16: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:06:16: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:06:16: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1066,132 +1061,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x25a5c68: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x25b9398: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x25ca778: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x25ca938: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x25caaf8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x25cf1d8: {[features Value[363 x *]] }
-MPI Rank 1: 0x25d0a28: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x25df128: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x25df338: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x25df4f8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x25df6f8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x25df858: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x25dfa18: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x2635a38: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x2646a18: {[Prior Value[132]] }
-MPI Rank 1: 0x267b058: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x267f208: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x268c518: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x2693088: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x2693248: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x2693408: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x26935c8: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0x26a6b78: {[labels Value[132 x *]] }
-MPI Rank 1: 0x26a7a78: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x26cbaa8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x26cbc68: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x26cbe28: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x26cefc8: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x17c5fc8: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x17c6a58: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x17c7398: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x17c7548: {[features Value[363 x *]] }
+MPI Rank 1: 0x17dc828: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x17e0eb8: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 0x1871308: {[Prior Value[132]] }
+MPI Rank 1: 0x18743d8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x1874598: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x1874758: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x1874918: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x1874ad8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x1874d98: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x18a3c68: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x18baa58: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x18cf508: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x18cf668: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x18da2c8: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x18e1e48: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x18e2008: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x18e21c8: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x18e2388: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x18ffdb8: {[labels Value[132 x *]] }
+MPI Rank 1: 0x18ffff8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x19003c8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x1900528: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x19005c8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x1900678: {[LogOfPrior Value[132]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:06:16: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:56: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:06:56: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:06:56: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:06:16: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:06:16: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:06:16: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:58: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:06:17: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:06:18: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:06:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:06:59:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.8465s; samplesPerSecond = 756.1
-MPI Rank 1: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.4517s; samplesPerSecond = 1416.8
-MPI Rank 1: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3105s; samplesPerSecond = 2061.1
-MPI Rank 1: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4235s; samplesPerSecond = 1511.3
-MPI Rank 1: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4066s; samplesPerSecond = 1573.9
-MPI Rank 1: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.6490s; samplesPerSecond = 986.2
-MPI Rank 1: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.5054s; samplesPerSecond = 1266.3
-MPI Rank 1: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.2985s; samplesPerSecond = 2144.1
-MPI Rank 1: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3538s; samplesPerSecond = 1808.7
-MPI Rank 1: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.4080s; samplesPerSecond = 1568.8
-MPI Rank 1: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4845s; samplesPerSecond = 1320.9
-MPI Rank 1: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.2915s; samplesPerSecond = 2195.8
-MPI Rank 1: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2754s; samplesPerSecond = 2323.7
-MPI Rank 1: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4522s; samplesPerSecond = 1415.2
-MPI Rank 1: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.3141s; samplesPerSecond = 2037.7
-MPI Rank 1: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.4139s; samplesPerSecond = 1546.1
-MPI Rank 1: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3326s; samplesPerSecond = 1924.2
-MPI Rank 1: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.4650s; samplesPerSecond = 1376.3
-MPI Rank 1: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.4642s; samplesPerSecond = 1378.7
-MPI Rank 1: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.6043s; samplesPerSecond = 1059.0
-MPI Rank 1: 05/03/2016 18:07:08:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 1.0798s; samplesPerSecond = 592.7
-MPI Rank 1: 05/03/2016 18:07:09:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.5760s; samplesPerSecond = 1111.0
-MPI Rank 1: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5765s; samplesPerSecond = 1110.2
-MPI Rank 1: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.6977s; samplesPerSecond = 917.3
-MPI Rank 1: 05/03/2016 18:07:11:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 1.0501s; samplesPerSecond = 609.5
-MPI Rank 1: 05/03/2016 18:07:12:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.7258s; samplesPerSecond = 881.8
-MPI Rank 1: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6569s; samplesPerSecond = 974.3
-MPI Rank 1: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3435s; samplesPerSecond = 1863.2
-MPI Rank 1: 05/03/2016 18:07:14:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.9141s; samplesPerSecond = 700.1
-MPI Rank 1: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.9670s; samplesPerSecond = 661.9
-MPI Rank 1: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.3979s; samplesPerSecond = 1608.5
-MPI Rank 1: 05/03/2016 18:07:16:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.3136s; samplesPerSecond = 2040.9
-MPI Rank 1: 05/03/2016 18:07:16: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=17.1611s
+MPI Rank 1: 05/03/2016 18:06:18: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.2685s; samplesPerSecond = 2384.0
+MPI Rank 1: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1976s; samplesPerSecond = 3238.8
+MPI Rank 1: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3865s; samplesPerSecond = 1656.0
+MPI Rank 1: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4949s; samplesPerSecond = 1293.1
+MPI Rank 1: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4003s; samplesPerSecond = 1598.7
+MPI Rank 1: 05/03/2016 18:06:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.7204s; samplesPerSecond = 888.4
+MPI Rank 1: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.5263s; samplesPerSecond = 1216.0
+MPI Rank 1: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.3657s; samplesPerSecond = 1750.1
+MPI Rank 1: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3245s; samplesPerSecond = 1972.3
+MPI Rank 1: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.5221s; samplesPerSecond = 1225.9
+MPI Rank 1: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4823s; samplesPerSecond = 1326.9
+MPI Rank 1: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.3969s; samplesPerSecond = 1612.7
+MPI Rank 1: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.4654s; samplesPerSecond = 1375.1
+MPI Rank 1: 05/03/2016 18:06:24:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4357s; samplesPerSecond = 1468.9
+MPI Rank 1: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 1.1194s; samplesPerSecond = 571.7
+MPI Rank 1: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.2827s; samplesPerSecond = 2263.6
+MPI Rank 1: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3361s; samplesPerSecond = 1904.0
+MPI Rank 1: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.5212s; samplesPerSecond = 1227.9
+MPI Rank 1: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.3101s; samplesPerSecond = 2063.8
+MPI Rank 1: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.2504s; samplesPerSecond = 2555.8
+MPI Rank 1: 05/03/2016 18:06:27:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.7725s; samplesPerSecond = 828.5
+MPI Rank 1: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7435s; samplesPerSecond = 860.8
+MPI Rank 1: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5494s; samplesPerSecond = 1165.0
+MPI Rank 1: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.4834s; samplesPerSecond = 1323.9
+MPI Rank 1: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.5492s; samplesPerSecond = 1165.3
+MPI Rank 1: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.3521s; samplesPerSecond = 1817.7
+MPI Rank 1: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.3011s; samplesPerSecond = 2125.8
+MPI Rank 1: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3536s; samplesPerSecond = 1809.7
+MPI Rank 1: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3498s; samplesPerSecond = 1829.4
+MPI Rank 1: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.4612s; samplesPerSecond = 1387.8
+MPI Rank 1: 05/03/2016 18:06:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5176s; samplesPerSecond = 1236.5
+MPI Rank 1: 05/03/2016 18:06:33:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.8944s; samplesPerSecond = 715.6
+MPI Rank 1: 05/03/2016 18:06:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=15.1578s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:16: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:06:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:16: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:07:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1383s; samplesPerSecond = 2249.0
-MPI Rank 1: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.8159s; samplesPerSecond = 3137.5
-MPI Rank 1: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.7395s; samplesPerSecond = 3461.9
-MPI Rank 1: 05/03/2016 18:07:19:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 1.0223s; samplesPerSecond = 2504.1
-MPI Rank 1: 05/03/2016 18:07:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.7687s; samplesPerSecond = 3330.2
-MPI Rank 1: 05/03/2016 18:07:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.7306s; samplesPerSecond = 3504.1
-MPI Rank 1: 05/03/2016 18:07:22:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9915s; samplesPerSecond = 2582.0
-MPI Rank 1: 05/03/2016 18:07:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.7642s; samplesPerSecond = 3349.8
-MPI Rank 1: 05/03/2016 18:07:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.0163s
+MPI Rank 1: 05/03/2016 18:06:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.0709s; samplesPerSecond = 2390.6
+MPI Rank 1: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.5932s; samplesPerSecond = 4315.5
+MPI Rank 1: 05/03/2016 18:06:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.4161s; samplesPerSecond = 1807.8
+MPI Rank 1: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.7195s; samplesPerSecond = 3558.0
+MPI Rank 1: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.8813s; samplesPerSecond = 2904.7
+MPI Rank 1: 05/03/2016 18:06:39:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 1.0777s; samplesPerSecond = 2375.4
+MPI Rank 1: 05/03/2016 18:06:40:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9372s; samplesPerSecond = 2731.6
+MPI Rank 1: 05/03/2016 18:06:40:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.6304s; samplesPerSecond = 4061.1
+MPI Rank 1: 05/03/2016 18:06:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.34334s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:06:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:07:25:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 1.9664s; samplesPerSecond = 5207.6
-MPI Rank 1: 05/03/2016 18:07:27:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.0769s; samplesPerSecond = 4930.4
-MPI Rank 1: 05/03/2016 18:07:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.06106s
-MPI Rank 1: 05/03/2016 18:07:27: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:06:40: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:43:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 2.3349s; samplesPerSecond = 4385.5
+MPI Rank 1: 05/03/2016 18:06:45:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.4079s; samplesPerSecond = 4252.6
+MPI Rank 1: 05/03/2016 18:06:45: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.75899s
+MPI Rank 1: 05/03/2016 18:06:45: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:27: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:06:45: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:27: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:06:56: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:06:56: Build info: 
+MPI Rank 1: 05/03/2016 18:06:45: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:06:16: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:06:16: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:06:56: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 2: 05/03/2016 18:06:56: 		Build type: release
-MPI Rank 2: 05/03/2016 18:06:56: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:06:56: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 18:06:56: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:06:56: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:06:56: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:06:56: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:06:56: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:06:56: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:06:56: 		Built by philly on 18750d26eb32
-MPI Rank 2: 05/03/2016 18:06:56: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:06:56: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:06:16: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:06:16: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 2: 05/03/2016 18:06:16: 		Build type: release
+MPI Rank 2: 05/03/2016 18:06:16: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:06:16: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 18:06:16: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:06:16: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:06:16: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:06:16: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:06:16: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:06:16: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:06:16: 		Built by philly on 18750d26eb32
+MPI Rank 2: 05/03/2016 18:06:16: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:06:16: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Running on localhost at 2016/05/03 18:06:56
-MPI Rank 2: 05/03/2016 18:06:56: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: 05/03/2016 18:06:16: Running on localhost at 2016/05/03 18:06:16
+MPI Rank 2: 05/03/2016 18:06:16: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:06:56: precision = "float"
+MPI Rank 2: 05/03/2016 18:06:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:16: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1281,27 +1281,25 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
-MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:06:56: precision = "float"
+MPI Rank 2: 05/03/2016 18:06:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:16: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1385,33 +1383,31 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
-MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:16: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=float
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1493,35 +1489,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 2: ]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:06:56: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:06:56: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:06:56: Precision = "float"
-MPI Rank 2: 05/03/2016 18:06:56: Using 1 CPU threads.
-MPI Rank 2: 05/03/2016 18:06:56: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:06:56: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 18:06:56: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 18:06:16: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:16: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:06:16: Precision = "float"
+MPI Rank 2: 05/03/2016 18:06:16: Using 1 CPU threads.
+MPI Rank 2: 05/03/2016 18:06:16: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:06:16: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 18:06:16: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: ##############################################################################
-MPI Rank 2: 05/03/2016 18:06:56: #                                                                            #
-MPI Rank 2: 05/03/2016 18:06:56: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:06:56: #                                                                            #
-MPI Rank 2: 05/03/2016 18:06:56: ##############################################################################
+MPI Rank 2: 05/03/2016 18:06:16: ##############################################################################
+MPI Rank 2: 05/03/2016 18:06:16: #                                                                            #
+MPI Rank 2: 05/03/2016 18:06:16: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:06:16: #                                                                            #
+MPI Rank 2: 05/03/2016 18:06:16: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:06:16: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:06:16: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1573,14 +1568,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 18:06:16: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:06:56: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:06:16: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:06:16: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:06:16: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:06:16: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1588,103 +1583,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x16195f8: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x1637a98: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x1641c18: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x1641ff8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x16436f8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x16437f8: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x16476a8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x164d258: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x164d418: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x164d5d8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x1650c58: {[features Value[363 x *]] }
-MPI Rank 2: 0x1666cf8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1666eb8: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0x16ce0f8: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x16fda38: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x1702218: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x17023d8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x17029d8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x1702b98: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1716958: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x1748f58: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x1749228: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1749e28: {[Prior Value[132]] }
-MPI Rank 2: 0x1759208: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x17596a8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x17599e8: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x176a888: {[labels Value[132 x *]] }
-MPI Rank 2: 0x176c7e8: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x2555908: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x2555c48: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x256a5f8: {[labels Value[132 x *]] }
+MPI Rank 2: 0x256bd48: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x256da98: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x25703a8: {[features Value[363 x *]] }
+MPI Rank 2: 0x25c1738: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x25e3eb8: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x25e40c8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x25e4288: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x25e4448: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x25e4608: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x25e47c8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x261b718: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x261b8d8: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0x2679338: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x2685028: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0x2685188: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x268d658: {[Prior Value[132]] }
+MPI Rank 2: 0x268e2c8: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x268e488: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x268ef78: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x268f4c8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x268f5c8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x268f678: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x2691c38: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x2694418: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0x26944b8: {[W1 Value[512 x 512]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:06:16: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:56: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:06:56: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:06:56: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:06:16: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:06:16: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:06:16: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:58: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:06:17: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:06:18: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:06:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:06:59:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.8622s; samplesPerSecond = 742.3
-MPI Rank 2: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.4466s; samplesPerSecond = 1433.2
-MPI Rank 2: 05/03/2016 18:07:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3104s; samplesPerSecond = 2061.8
-MPI Rank 2: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4235s; samplesPerSecond = 1511.2
-MPI Rank 2: 05/03/2016 18:07:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4066s; samplesPerSecond = 1574.2
-MPI Rank 2: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.5783s; samplesPerSecond = 1106.7
-MPI Rank 2: 05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.5628s; samplesPerSecond = 1137.1
-MPI Rank 2: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.2968s; samplesPerSecond = 2156.5
-MPI Rank 2: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3532s; samplesPerSecond = 1812.1
-MPI Rank 2: 05/03/2016 18:07:03:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.4110s; samplesPerSecond = 1557.2
-MPI Rank 2: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.4718s; samplesPerSecond = 1356.5
-MPI Rank 2: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.2906s; samplesPerSecond = 2202.3
-MPI Rank 2: 05/03/2016 18:07:04:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2818s; samplesPerSecond = 2270.9
-MPI Rank 2: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4446s; samplesPerSecond = 1439.3
-MPI Rank 2: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.3155s; samplesPerSecond = 2028.8
-MPI Rank 2: 05/03/2016 18:07:05:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.4134s; samplesPerSecond = 1548.2
-MPI Rank 2: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3451s; samplesPerSecond = 1854.7
-MPI Rank 2: 05/03/2016 18:07:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.4548s; samplesPerSecond = 1407.3
-MPI Rank 2: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.4513s; samplesPerSecond = 1418.0
-MPI Rank 2: 05/03/2016 18:07:07:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.6294s; samplesPerSecond = 1016.8
-MPI Rank 2: 05/03/2016 18:07:08:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 1.0040s; samplesPerSecond = 637.5
-MPI Rank 2: 05/03/2016 18:07:09:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6414s; samplesPerSecond = 997.7
-MPI Rank 2: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5732s; samplesPerSecond = 1116.5
-MPI Rank 2: 05/03/2016 18:07:10:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.6992s; samplesPerSecond = 915.3
-MPI Rank 2: 05/03/2016 18:07:11:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 1.1236s; samplesPerSecond = 569.6
-MPI Rank 2: 05/03/2016 18:07:12:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.6516s; samplesPerSecond = 982.2
-MPI Rank 2: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6576s; samplesPerSecond = 973.3
-MPI Rank 2: 05/03/2016 18:07:13:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3433s; samplesPerSecond = 1864.1
-MPI Rank 2: 05/03/2016 18:07:14:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.9910s; samplesPerSecond = 645.8
-MPI Rank 2: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.8889s; samplesPerSecond = 720.0
-MPI Rank 2: 05/03/2016 18:07:15:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.4037s; samplesPerSecond = 1585.5
-MPI Rank 2: 05/03/2016 18:07:16:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.3154s; samplesPerSecond = 2029.0
-MPI Rank 2: 05/03/2016 18:07:16: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=17.1611s
+MPI Rank 2: 05/03/2016 18:06:18: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181891 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.2686s; samplesPerSecond = 2382.9
+MPI Rank 2: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675421 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1976s; samplesPerSecond = 3238.1
+MPI Rank 2: 05/03/2016 18:06:18:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684059 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4059s; samplesPerSecond = 1576.7
+MPI Rank 2: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595333 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.4661s; samplesPerSecond = 1373.2
+MPI Rank 2: 05/03/2016 18:06:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007105 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.4198s; samplesPerSecond = 1524.6
+MPI Rank 2: 05/03/2016 18:06:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428214 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.7104s; samplesPerSecond = 900.9
+MPI Rank 2: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475346 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.5187s; samplesPerSecond = 1233.9
+MPI Rank 2: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591998 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.3681s; samplesPerSecond = 1738.5
+MPI Rank 2: 05/03/2016 18:06:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042336 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3229s; samplesPerSecond = 1982.0
+MPI Rank 2: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39384191 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.4975s; samplesPerSecond = 1286.5
+MPI Rank 2: 05/03/2016 18:06:22:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078527 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.5045s; samplesPerSecond = 1268.6
+MPI Rank 2: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35324790 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.3969s; samplesPerSecond = 1612.6
+MPI Rank 2: 05/03/2016 18:06:23:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606895 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.4634s; samplesPerSecond = 1381.0
+MPI Rank 2: 05/03/2016 18:06:24:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110651 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.4470s; samplesPerSecond = 1431.8
+MPI Rank 2: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118547 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 1.1102s; samplesPerSecond = 576.5
+MPI Rank 2: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474034 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.2978s; samplesPerSecond = 2149.0
+MPI Rank 2: 05/03/2016 18:06:25:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89903187 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.3435s; samplesPerSecond = 1863.2
+MPI Rank 2: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173292 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.5227s; samplesPerSecond = 1224.3
+MPI Rank 2: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969107 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.2955s; samplesPerSecond = 2165.6
+MPI Rank 2: 05/03/2016 18:06:26:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870412 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.2553s; samplesPerSecond = 2506.9
+MPI Rank 2: 05/03/2016 18:06:27:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655150 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.7614s; samplesPerSecond = 840.6
+MPI Rank 2: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327720 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7435s; samplesPerSecond = 860.7
+MPI Rank 2: 05/03/2016 18:06:28:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53100193 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.5666s; samplesPerSecond = 1129.5
+MPI Rank 2: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43748447 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.4588s; samplesPerSecond = 1395.0
+MPI Rank 2: 05/03/2016 18:06:29:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41106807 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.5634s; samplesPerSecond = 1136.0
+MPI Rank 2: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898886 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.3528s; samplesPerSecond = 1814.0
+MPI Rank 2: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965819 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2983s; samplesPerSecond = 2145.3
+MPI Rank 2: 05/03/2016 18:06:30:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23707549 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3560s; samplesPerSecond = 1797.5
+MPI Rank 2: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135317 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3426s; samplesPerSecond = 1868.1
+MPI Rank 2: 05/03/2016 18:06:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21606912 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.4546s; samplesPerSecond = 1407.7
+MPI Rank 2: 05/03/2016 18:06:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29109817 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5149s; samplesPerSecond = 1243.1
+MPI Rank 2: 05/03/2016 18:06:33:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535403 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.9070s; samplesPerSecond = 705.6
+MPI Rank 2: 05/03/2016 18:06:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737292 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=15.1578s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:16: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:06:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:16: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:07:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.1413s; samplesPerSecond = 2243.1
-MPI Rank 2: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.8740s; samplesPerSecond = 2929.0
-MPI Rank 2: 05/03/2016 18:07:18:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.6918s; samplesPerSecond = 3700.5
-MPI Rank 2: 05/03/2016 18:07:19:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 1.0124s; samplesPerSecond = 2528.7
-MPI Rank 2: 05/03/2016 18:07:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.7589s; samplesPerSecond = 3373.2
-MPI Rank 2: 05/03/2016 18:07:21:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.7435s; samplesPerSecond = 3443.4
-MPI Rank 2: 05/03/2016 18:07:22:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9858s; samplesPerSecond = 2596.9
-MPI Rank 2: 05/03/2016 18:07:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.7583s; samplesPerSecond = 3375.8
-MPI Rank 2: 05/03/2016 18:07:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.98068s
+MPI Rank 2: 05/03/2016 18:06:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711163 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 1.0657s; samplesPerSecond = 2402.1
+MPI Rank 2: 05/03/2016 18:06:34:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925373 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.6026s; samplesPerSecond = 4248.6
+MPI Rank 2: 05/03/2016 18:06:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826588 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.3490s; samplesPerSecond = 1897.8
+MPI Rank 2: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095883 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.7737s; samplesPerSecond = 3308.6
+MPI Rank 2: 05/03/2016 18:06:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550194 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.8267s; samplesPerSecond = 3096.8
+MPI Rank 2: 05/03/2016 18:06:39:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561798 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 1.1229s; samplesPerSecond = 2279.9
+MPI Rank 2: 05/03/2016 18:06:39:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069853 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.9351s; samplesPerSecond = 2737.7
+MPI Rank 2: 05/03/2016 18:06:40:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857102 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.6565s; samplesPerSecond = 3899.2
+MPI Rank 2: 05/03/2016 18:06:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199744 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.34262s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:06:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:07:25:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 1.8984s; samplesPerSecond = 5394.0
-MPI Rank 2: 05/03/2016 18:07:27:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.1353s; samplesPerSecond = 4795.6
-MPI Rank 2: 05/03/2016 18:07:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.06107s
-MPI Rank 2: 05/03/2016 18:07:27: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:06:40: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:43:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946156 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 2.3350s; samplesPerSecond = 4385.5
+MPI Rank 2: 05/03/2016 18:06:45:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066794 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 2.4055s; samplesPerSecond = 4256.9
+MPI Rank 2: 05/03/2016 18:06:45: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506475 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.75821s
+MPI Rank 2: 05/03/2016 18:06:45: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:27: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:06:45: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:27: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:06:45: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.gpu.txt
index f959959fc..1b2025133 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=1 stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/.. OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -35,6 +35,8 @@ Build info:
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
+Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
+MPIWrapper: initializing MPI
 -------------------------------------------------------------------
 Build info: 
 
@@ -54,10 +56,8 @@ Build info:
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPIWrapper: initializing MPI
 --------------------------------------------------------------------------
-[[43390,1],1]: A high-performance Open MPI point-to-point messaging module
+[[62876,1],1]: A high-performance Open MPI point-to-point messaging module
 was unable to find any relevant network interfaces:
 
 Module: OpenFabrics (openib)
@@ -70,6 +70,13 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 0 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
@@ -83,20 +90,13 @@ ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 0 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:07:28: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank0
-05/03/2016 18:07:28: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank1
-05/03/2016 18:07:29: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank2
-[870bdeb651b9:10706] 2 more processes have sent help message help-mpi-btl-base.txt / btl:no-nics
-[870bdeb651b9:10706] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
+05/03/2016 18:06:46: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank0
+05/03/2016 18:06:46: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank1
+05/03/2016 18:06:47: Redirecting stderr to file /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank2
+[870bdeb651b9:30000] 2 more processes have sent help message help-mpi-btl-base.txt / btl:no-nics
+[870bdeb651b9:30000] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
 --------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 10708 on
+mpiexec has exited due to process rank 0 with PID 30002 on
 node 870bdeb651b9 exiting improperly. There are three reasons this could occur:
 
 1. this process did not call "init" before exiting, but others in
@@ -119,32 +119,32 @@ terminated by signals sent by mpiexec (as reported here).
 You can avoid this message by specifying -quiet on the mpiexec command line.
 
 --------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:07:28: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:07:28: Build info: 
+MPI Rank 0: 05/03/2016 18:06:46: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:06:46: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:07:28: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 0: 05/03/2016 18:07:28: 		Build type: release
-MPI Rank 0: 05/03/2016 18:07:28: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:07:28: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 18:07:28: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:07:28: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:07:28: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:07:28: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:07:28: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:07:28: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:07:28: 		Built by philly on 18750d26eb32
-MPI Rank 0: 05/03/2016 18:07:28: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:07:28: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 18:06:46: 		Built time: May  3 2016 17:56:15
+MPI Rank 0: 05/03/2016 18:06:46: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 0: 05/03/2016 18:06:46: 		Build type: release
+MPI Rank 0: 05/03/2016 18:06:46: 		Build target: GPU
+MPI Rank 0: 05/03/2016 18:06:46: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 18:06:46: 		Math lib: acml
+MPI Rank 0: 05/03/2016 18:06:46: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 0: 05/03/2016 18:06:46: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 05/03/2016 18:06:46: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 0: 05/03/2016 18:06:46: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 18:06:46: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 0: 05/03/2016 18:06:46: 		Built by philly on 18750d26eb32
+MPI Rank 0: 05/03/2016 18:06:46: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 0: 05/03/2016 18:06:46: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Running on localhost at 2016/05/03 18:07:28
-MPI Rank 0: 05/03/2016 18:07:28: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: 05/03/2016 18:06:46: Running on localhost at 2016/05/03 18:06:46
+MPI Rank 0: 05/03/2016 18:06:46: Command line: 
+MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:07:28: precision = "float"
+MPI Rank 0: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:46: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -234,27 +234,25 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
-MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:07:28: precision = "float"
+MPI Rank 0: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:46: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -338,33 +336,31 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 0: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
-MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=float
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -446,35 +442,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 0: ]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:07:28: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:07:28: Precision = "float"
-MPI Rank 0: 05/03/2016 18:07:28: Using 1 CPU threads.
-MPI Rank 0: 05/03/2016 18:07:28: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:07:28: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 18:07:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 18:06:46: Commands: speechTrain
+MPI Rank 0: 05/03/2016 18:06:46: Precision = "float"
+MPI Rank 0: 05/03/2016 18:06:46: Using 1 CPU threads.
+MPI Rank 0: 05/03/2016 18:06:46: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 18:06:46: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 18:06:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: ##############################################################################
-MPI Rank 0: 05/03/2016 18:07:28: #                                                                            #
-MPI Rank 0: 05/03/2016 18:07:28: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:07:28: #                                                                            #
-MPI Rank 0: 05/03/2016 18:07:28: ##############################################################################
+MPI Rank 0: 05/03/2016 18:06:46: ##############################################################################
+MPI Rank 0: 05/03/2016 18:06:46: #                                                                            #
+MPI Rank 0: 05/03/2016 18:06:46: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 18:06:46: #                                                                            #
+MPI Rank 0: 05/03/2016 18:06:46: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 18:06:46: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Creating virgin network.
+MPI Rank 0: 05/03/2016 18:06:46: Creating virgin network.
 MPI Rank 0: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -527,14 +522,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 18:06:46: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:07:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 18:06:46: Training criterion node(s):
+MPI Rank 0: 05/03/2016 18:06:46: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 18:06:46: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 18:06:46: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -542,135 +537,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x1200088: {[features Value[363 x *]] }
-MPI Rank 0: 0x7fd5b451e2b8: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x7fd5b451f0e8: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x7fd5b4520288: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x7fd5b4520a08: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x7fd5b4521d68: {[labels Value[132 x *]] }
-MPI Rank 0: 0x7fd5b4522fc8: {[Prior Value[132]] }
-MPI Rank 0: 0x7fd5b4528b28: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x7fd5b4528c88: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x7fd5b4528e48: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x7fd5b4529388: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x7fd5b452ad08: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x7fd5b452ae58: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x7fd5b452b238: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452b398: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x7fd5b452b4f8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452b658: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452b818: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452b9d8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452bb98: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x7fd5b452c728: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x7fd5b452c8e8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452caa8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x7fd5b452cc68: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0x7fd5b4ed26d8: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x7fd5b4ef0488: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x7fd5b5b8ace8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x7fd5b5b9a4a8: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x1368538: {[features Value[363 x *]] }
+MPI Rank 0: 0x1a1f918: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0x1a20608: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0x1a21348: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0x1f22418: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0x224efe8: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0x224fdf8: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0x2250f98: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0x2251718: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0x2252a78: {[labels Value[132 x *]] }
+MPI Rank 0: 0x2253cd8: {[Prior Value[132]] }
+MPI Rank 0: 0x7fd719c1ed78: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0x7fd719c1ef38: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0x7fd719c1f0f8: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0x7fd719c20888: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0x7fd719c21c98: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0x7fd719c22398: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0x7fd719c228c8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0x7fd719c22a28: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0x7fd719c22be8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x7fd719c22da8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0x7fd719c22f68: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 0x7fd719c23128: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0x7fd719c232e8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0x7fd719c23e48: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0x7fd719c24008: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x7fd719c241c8: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0x7fd719c24388: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 18:06:46: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:28: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:07:28: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:07:28: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 18:06:46: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 18:06:46: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 18:06:46: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:31: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 18:06:48: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 18:06:49: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.2385s; samplesPerSecond = 2683.2
-MPI Rank 0: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1236s; samplesPerSecond = 5179.6
-MPI Rank 0: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1233s; samplesPerSecond = 5192.5
-MPI Rank 0: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1224s; samplesPerSecond = 5227.7
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1220s; samplesPerSecond = 5244.7
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1233s; samplesPerSecond = 5189.7
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1230s; samplesPerSecond = 5202.3
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1213s; samplesPerSecond = 5277.8
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1230s; samplesPerSecond = 5201.9
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1229s; samplesPerSecond = 5209.6
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1228s; samplesPerSecond = 5211.6
-MPI Rank 0: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1226s; samplesPerSecond = 5220.1
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1226s; samplesPerSecond = 5219.8
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1221s; samplesPerSecond = 5240.4
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1227s; samplesPerSecond = 5214.4
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1225s; samplesPerSecond = 5224.3
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1224s; samplesPerSecond = 5227.8
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1236s; samplesPerSecond = 5176.4
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1215s; samplesPerSecond = 5268.8
-MPI Rank 0: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1226s; samplesPerSecond = 5219.3
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1261s; samplesPerSecond = 5076.8
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1260s; samplesPerSecond = 5080.5
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1225s; samplesPerSecond = 5225.4
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1219s; samplesPerSecond = 5248.5
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1227s; samplesPerSecond = 5217.8
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1224s; samplesPerSecond = 5227.1
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1213s; samplesPerSecond = 5278.1
-MPI Rank 0: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1228s; samplesPerSecond = 5211.0
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1236s; samplesPerSecond = 5176.3
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1225s; samplesPerSecond = 5225.6
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1228s; samplesPerSecond = 5211.5
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1231s; samplesPerSecond = 5199.3
-MPI Rank 0: 05/03/2016 18:07:36: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.14607s
-MPI Rank 0: 05/03/2016 18:07:36: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 18:06:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.1356s; samplesPerSecond = 4721.2
+MPI Rank 0: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1255s; samplesPerSecond = 5100.8
+MPI Rank 0: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1235s; samplesPerSecond = 5180.8
+MPI Rank 0: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1244s; samplesPerSecond = 5142.8
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1269s; samplesPerSecond = 5042.0
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1267s; samplesPerSecond = 5050.1
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1237s; samplesPerSecond = 5172.3
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1220s; samplesPerSecond = 5244.6
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1232s; samplesPerSecond = 5195.8
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1252s; samplesPerSecond = 5113.5
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1242s; samplesPerSecond = 5152.4
+MPI Rank 0: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1241s; samplesPerSecond = 5156.7
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1329s; samplesPerSecond = 4815.4
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1241s; samplesPerSecond = 5156.5
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1237s; samplesPerSecond = 5173.8
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1237s; samplesPerSecond = 5175.6
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1238s; samplesPerSecond = 5168.2
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1242s; samplesPerSecond = 5151.8
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1221s; samplesPerSecond = 5242.6
+MPI Rank 0: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1235s; samplesPerSecond = 5183.7
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1232s; samplesPerSecond = 5194.7
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1250s; samplesPerSecond = 5118.1
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1242s; samplesPerSecond = 5150.9
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1242s; samplesPerSecond = 5154.2
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1241s; samplesPerSecond = 5156.8
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1245s; samplesPerSecond = 5141.8
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1237s; samplesPerSecond = 5173.5
+MPI Rank 0: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1247s; samplesPerSecond = 5133.1
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1239s; samplesPerSecond = 5165.4
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1236s; samplesPerSecond = 5178.9
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1239s; samplesPerSecond = 5164.6
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1243s; samplesPerSecond = 5150.7
+MPI Rank 0: 05/03/2016 18:06:53: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.00579s
+MPI Rank 0: 05/03/2016 18:06:53: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:36: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 18:06:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:36: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1432s; samplesPerSecond = 17874.2
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1356s; samplesPerSecond = 18880.3
-MPI Rank 0: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1362s; samplesPerSecond = 18795.6
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1345s; samplesPerSecond = 19031.8
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1347s; samplesPerSecond = 19008.0
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1342s; samplesPerSecond = 19071.9
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1340s; samplesPerSecond = 19100.2
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1358s; samplesPerSecond = 18856.1
-MPI Rank 0: 05/03/2016 18:07:37: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.09921s
-MPI Rank 0: 05/03/2016 18:07:37: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 18:06:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1445s; samplesPerSecond = 17721.8
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1363s; samplesPerSecond = 18784.0
+MPI Rank 0: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1373s; samplesPerSecond = 18641.4
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1360s; samplesPerSecond = 18830.3
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1352s; samplesPerSecond = 18932.0
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1350s; samplesPerSecond = 18957.1
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1352s; samplesPerSecond = 18931.3
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1362s; samplesPerSecond = 18791.9
+MPI Rank 0: 05/03/2016 18:06:54: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.10678s
+MPI Rank 0: 05/03/2016 18:06:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:37: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 18:06:54: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 18:07:37:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2058s; samplesPerSecond = 49749.1
-MPI Rank 0: 05/03/2016 18:07:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2041s; samplesPerSecond = 50176.2
-MPI Rank 0: 05/03/2016 18:07:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.421912s
-MPI Rank 0: 05/03/2016 18:07:38: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:07:38: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 18:06:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 18:06:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2103s; samplesPerSecond = 48701.1
+MPI Rank 0: 05/03/2016 18:06:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2101s; samplesPerSecond = 48731.5
+MPI Rank 0: 05/03/2016 18:06:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.433267s
+MPI Rank 0: 05/03/2016 18:06:55: SGD: Saving checkpoint model '/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 18:06:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:38: Action "train" complete.
+MPI Rank 0: 05/03/2016 18:06:55: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:07:38: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:07:28: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:07:28: Build info: 
+MPI Rank 0: 05/03/2016 18:06:55: __COMPLETED__
+MPI Rank 1: 05/03/2016 18:06:46: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:06:46: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:07:28: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 1: 05/03/2016 18:07:28: 		Build type: release
-MPI Rank 1: 05/03/2016 18:07:28: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:07:28: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 18:07:28: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:07:28: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:07:28: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:07:28: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:07:28: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:07:28: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:07:28: 		Built by philly on 18750d26eb32
-MPI Rank 1: 05/03/2016 18:07:28: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:07:28: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 18:06:46: 		Built time: May  3 2016 17:56:15
+MPI Rank 1: 05/03/2016 18:06:46: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 1: 05/03/2016 18:06:46: 		Build type: release
+MPI Rank 1: 05/03/2016 18:06:46: 		Build target: GPU
+MPI Rank 1: 05/03/2016 18:06:46: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 18:06:46: 		Math lib: acml
+MPI Rank 1: 05/03/2016 18:06:46: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 1: 05/03/2016 18:06:46: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 05/03/2016 18:06:46: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 1: 05/03/2016 18:06:46: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 18:06:46: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 1: 05/03/2016 18:06:46: 		Built by philly on 18750d26eb32
+MPI Rank 1: 05/03/2016 18:06:46: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 1: 05/03/2016 18:06:46: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Running on localhost at 2016/05/03 18:07:28
-MPI Rank 1: 05/03/2016 18:07:28: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: 05/03/2016 18:06:46: Running on localhost at 2016/05/03 18:06:46
+MPI Rank 1: 05/03/2016 18:06:46: Command line: 
+MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:07:28: precision = "float"
+MPI Rank 1: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:46: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -760,27 +760,25 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
-MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:07:28: precision = "float"
+MPI Rank 1: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:46: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -864,33 +862,31 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 1: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
-MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 18:06:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=float
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -972,35 +968,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 1: ]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:07:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:07:28: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:07:28: Precision = "float"
-MPI Rank 1: 05/03/2016 18:07:28: Using 1 CPU threads.
-MPI Rank 1: 05/03/2016 18:07:28: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:07:28: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 18:07:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 18:06:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 18:06:46: Commands: speechTrain
+MPI Rank 1: 05/03/2016 18:06:46: Precision = "float"
+MPI Rank 1: 05/03/2016 18:06:46: Using 1 CPU threads.
+MPI Rank 1: 05/03/2016 18:06:46: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 18:06:46: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 18:06:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: ##############################################################################
-MPI Rank 1: 05/03/2016 18:07:28: #                                                                            #
-MPI Rank 1: 05/03/2016 18:07:28: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:07:28: #                                                                            #
-MPI Rank 1: 05/03/2016 18:07:28: ##############################################################################
+MPI Rank 1: 05/03/2016 18:06:46: ##############################################################################
+MPI Rank 1: 05/03/2016 18:06:46: #                                                                            #
+MPI Rank 1: 05/03/2016 18:06:46: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 18:06:46: #                                                                            #
+MPI Rank 1: 05/03/2016 18:06:46: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 18:06:46: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Creating virgin network.
+MPI Rank 1: 05/03/2016 18:06:46: Creating virgin network.
 MPI Rank 1: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1053,14 +1048,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 18:06:46: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:07:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 18:06:46: Training criterion node(s):
+MPI Rank 1: 05/03/2016 18:06:46: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 18:06:46: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 18:06:46: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1068,132 +1063,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x1b2dfd8: {[features Value[363 x *]] }
-MPI Rank 1: 0x1e68fa8: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x1e694b8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x1e6a1a8: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x26bbac8: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x29e8698: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x29e94a8: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x29ea618: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x29ead98: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x29ec118: {[labels Value[132 x *]] }
-MPI Rank 1: 0x29ed378: {[Prior Value[132]] }
-MPI Rank 1: 0x29f2ae8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x29f2c48: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x29f2da8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x29f3388: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x29f3578: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x29f5088: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x29f5788: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x29f5998: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x29f5af8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x29f5c58: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x29f5e18: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x29f5fd8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x29f6198: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x29f6cf8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x29f6eb8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x29f7078: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x29f7238: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x2d2d0a8: {[features Value[363 x *]] }
+MPI Rank 1: 0x7f386fd47208: {[LogOfPrior Value[132]] }
+MPI Rank 1: 0x7f386fd483c8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 0x7f386fd48ac8: {[W0*features Value[512 x *]] }
+MPI Rank 1: 0x7f386fd48ff8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 0x7f386fd49158: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 0x7f386fd49318: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x7f386fd494d8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 0x7f386fd49698: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 0x7f386fd49858: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 0x7f386fd49a18: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 0x7f386fd4a578: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 0x7f386fd4a738: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x7f386fd4a8f8: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 0x7f386fd4aab8: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 0x7f387047a438: {[W1 Value[512 x 512]] }
+MPI Rank 1: 0x7f387047b208: {[B1 Value[512 x 1]] }
+MPI Rank 1: 0x7f387047c3a8: {[W2 Value[132 x 512]] }
+MPI Rank 1: 0x7f387047cb28: {[B2 Value[132 x 1]] }
+MPI Rank 1: 0x7f387047de88: {[labels Value[132 x *]] }
+MPI Rank 1: 0x7f387047f0e8: {[Prior Value[132]] }
+MPI Rank 1: 0x7f3870484988: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 0x7f3870484c88: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 0x7f3870484e48: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 0x7f387048b6b8: {[B0 Value[512 x 1]] }
+MPI Rank 1: 0x7f38706e86c8: {[W0 Value[512 x 363]] }
+MPI Rank 1: 0x7f38706f62d8: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 0x7f38706f67e8: {[InvStdOfFeatures Value[363]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 18:06:46: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:28: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:07:28: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:07:28: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 18:06:46: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 18:06:46: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 18:06:46: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:31: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 18:06:49: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 18:06:49: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.2378s; samplesPerSecond = 2691.1
-MPI Rank 1: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1235s; samplesPerSecond = 5181.9
-MPI Rank 1: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1232s; samplesPerSecond = 5195.1
-MPI Rank 1: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1224s; samplesPerSecond = 5229.1
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1220s; samplesPerSecond = 5247.6
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1233s; samplesPerSecond = 5192.3
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1230s; samplesPerSecond = 5204.8
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1212s; samplesPerSecond = 5279.0
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1228s; samplesPerSecond = 5210.4
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1229s; samplesPerSecond = 5207.0
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1228s; samplesPerSecond = 5213.4
-MPI Rank 1: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1225s; samplesPerSecond = 5223.6
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1224s; samplesPerSecond = 5228.3
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1222s; samplesPerSecond = 5237.3
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1227s; samplesPerSecond = 5217.1
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1224s; samplesPerSecond = 5227.2
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1225s; samplesPerSecond = 5224.7
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1234s; samplesPerSecond = 5185.4
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1214s; samplesPerSecond = 5270.1
-MPI Rank 1: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1225s; samplesPerSecond = 5222.4
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1260s; samplesPerSecond = 5079.0
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1259s; samplesPerSecond = 5082.6
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1224s; samplesPerSecond = 5228.5
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1220s; samplesPerSecond = 5245.1
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1224s; samplesPerSecond = 5227.1
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1224s; samplesPerSecond = 5228.8
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1213s; samplesPerSecond = 5275.0
-MPI Rank 1: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1226s; samplesPerSecond = 5218.8
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1236s; samplesPerSecond = 5177.6
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1224s; samplesPerSecond = 5229.5
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1227s; samplesPerSecond = 5214.6
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1231s; samplesPerSecond = 5201.1
-MPI Rank 1: 05/03/2016 18:07:36: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.14572s
+MPI Rank 1: 05/03/2016 18:06:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.1349s; samplesPerSecond = 4744.0
+MPI Rank 1: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1254s; samplesPerSecond = 5103.3
+MPI Rank 1: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1234s; samplesPerSecond = 5184.8
+MPI Rank 1: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1244s; samplesPerSecond = 5143.7
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1269s; samplesPerSecond = 5044.7
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1266s; samplesPerSecond = 5055.1
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1237s; samplesPerSecond = 5172.6
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1220s; samplesPerSecond = 5247.2
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1231s; samplesPerSecond = 5198.7
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1251s; samplesPerSecond = 5115.9
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1241s; samplesPerSecond = 5155.3
+MPI Rank 1: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1241s; samplesPerSecond = 5158.2
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1328s; samplesPerSecond = 4819.7
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1241s; samplesPerSecond = 5157.2
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1237s; samplesPerSecond = 5175.8
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1236s; samplesPerSecond = 5178.6
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1238s; samplesPerSecond = 5170.6
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1242s; samplesPerSecond = 5153.5
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1220s; samplesPerSecond = 5245.9
+MPI Rank 1: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1234s; samplesPerSecond = 5186.2
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1231s; samplesPerSecond = 5197.0
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1249s; samplesPerSecond = 5123.2
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1242s; samplesPerSecond = 5151.6
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1241s; samplesPerSecond = 5156.6
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1240s; samplesPerSecond = 5159.5
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1244s; samplesPerSecond = 5144.6
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1236s; samplesPerSecond = 5176.3
+MPI Rank 1: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1246s; samplesPerSecond = 5135.3
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1238s; samplesPerSecond = 5167.7
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1235s; samplesPerSecond = 5181.1
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1239s; samplesPerSecond = 5166.7
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1242s; samplesPerSecond = 5153.7
+MPI Rank 1: 05/03/2016 18:06:53: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.00547s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:36: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 18:06:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:36: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1427s; samplesPerSecond = 17935.6
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1355s; samplesPerSecond = 18889.9
-MPI Rank 1: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1362s; samplesPerSecond = 18801.5
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1344s; samplesPerSecond = 19042.8
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1345s; samplesPerSecond = 19034.9
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1343s; samplesPerSecond = 19062.2
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1340s; samplesPerSecond = 19109.3
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1357s; samplesPerSecond = 18865.7
-MPI Rank 1: 05/03/2016 18:07:37: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.09877s
+MPI Rank 1: 05/03/2016 18:06:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1441s; samplesPerSecond = 17768.2
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1362s; samplesPerSecond = 18790.0
+MPI Rank 1: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1373s; samplesPerSecond = 18647.7
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1359s; samplesPerSecond = 18838.9
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1352s; samplesPerSecond = 18940.0
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1350s; samplesPerSecond = 18963.5
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1352s; samplesPerSecond = 18938.0
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1362s; samplesPerSecond = 18798.8
+MPI Rank 1: 05/03/2016 18:06:54: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.10634s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:37: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 18:06:54: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 18:07:37:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2053s; samplesPerSecond = 49872.2
-MPI Rank 1: 05/03/2016 18:07:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2040s; samplesPerSecond = 50190.4
-MPI Rank 1: 05/03/2016 18:07:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.421476s
-MPI Rank 1: 05/03/2016 18:07:38: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 18:06:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 18:06:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2098s; samplesPerSecond = 48819.8
+MPI Rank 1: 05/03/2016 18:06:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2101s; samplesPerSecond = 48738.0
+MPI Rank 1: 05/03/2016 18:06:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.43293s
+MPI Rank 1: 05/03/2016 18:06:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:38: Action "train" complete.
+MPI Rank 1: 05/03/2016 18:06:55: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:07:38: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:07:29: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:07:29: Build info: 
+MPI Rank 1: 05/03/2016 18:06:55: __COMPLETED__
+MPI Rank 2: 05/03/2016 18:06:47: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:06:47: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:07:29: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 2: 05/03/2016 18:07:29: 		Build type: release
-MPI Rank 2: 05/03/2016 18:07:29: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:07:29: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 18:07:29: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:07:29: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:07:29: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:07:29: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:07:29: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:07:29: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:07:29: 		Built by philly on 18750d26eb32
-MPI Rank 2: 05/03/2016 18:07:29: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:07:29: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 18:06:47: 		Built time: May  3 2016 17:56:15
+MPI Rank 2: 05/03/2016 18:06:47: 		Last modified date: Tue May  3 11:36:22 2016
+MPI Rank 2: 05/03/2016 18:06:47: 		Build type: release
+MPI Rank 2: 05/03/2016 18:06:47: 		Build target: GPU
+MPI Rank 2: 05/03/2016 18:06:47: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 18:06:47: 		Math lib: acml
+MPI Rank 2: 05/03/2016 18:06:47: 		CUDA_PATH: /usr/local/cuda-7.5
+MPI Rank 2: 05/03/2016 18:06:47: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 05/03/2016 18:06:47: 		CUDNN_PATH: /usr/local/cudnn-4.0
+MPI Rank 2: 05/03/2016 18:06:47: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 18:06:47: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+MPI Rank 2: 05/03/2016 18:06:47: 		Built by philly on 18750d26eb32
+MPI Rank 2: 05/03/2016 18:06:47: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+MPI Rank 2: 05/03/2016 18:06:47: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Running on localhost at 2016/05/03 18:07:29
-MPI Rank 2: 05/03/2016 18:07:29: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: 05/03/2016 18:06:47: Running on localhost at 2016/05/03 18:06:47
+MPI Rank 2: 05/03/2016 18:06:47: Command line: 
+MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/../cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..  OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:07:29: precision = "float"
+MPI Rank 2: 05/03/2016 18:06:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:47: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1283,27 +1283,25 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
-MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:07:29: precision = "float"
+MPI Rank 2: 05/03/2016 18:06:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:47: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1387,33 +1385,31 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
+MPI Rank 2: OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
-MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 18:06:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/../../../DNN
+MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/..
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=float
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1495,35 +1491,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 2: ]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:07:29: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:07:29: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:07:29: Precision = "float"
-MPI Rank 2: 05/03/2016 18:07:29: Using 1 CPU threads.
-MPI Rank 2: 05/03/2016 18:07:29: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:07:29: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 18:07:29: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 18:06:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 18:06:47: Commands: speechTrain
+MPI Rank 2: 05/03/2016 18:06:47: Precision = "float"
+MPI Rank 2: 05/03/2016 18:06:47: Using 1 CPU threads.
+MPI Rank 2: 05/03/2016 18:06:47: CNTKModelPath: /tmp/cntk-test-20160503175932.483858/Speech/DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 18:06:47: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 18:06:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: ##############################################################################
-MPI Rank 2: 05/03/2016 18:07:29: #                                                                            #
-MPI Rank 2: 05/03/2016 18:07:29: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:07:29: #                                                                            #
-MPI Rank 2: 05/03/2016 18:07:29: ##############################################################################
+MPI Rank 2: 05/03/2016 18:06:47: ##############################################################################
+MPI Rank 2: 05/03/2016 18:06:47: #                                                                            #
+MPI Rank 2: 05/03/2016 18:06:47: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 18:06:47: #                                                                            #
+MPI Rank 2: 05/03/2016 18:06:47: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 18:06:47: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W2/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Creating virgin network.
+MPI Rank 2: 05/03/2016 18:06:47: Creating virgin network.
 MPI Rank 2: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1576,14 +1571,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 18:06:47: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:07:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 18:06:47: Training criterion node(s):
+MPI Rank 2: 05/03/2016 18:06:47: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 18:06:47: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 18:06:47: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1591,103 +1586,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x1e72a78: {[features Value[363 x *]] }
-MPI Rank 2: 0x1f83298: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x25fdc88: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x25fee48: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x25ffb88: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x2d65d18: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x2d66b28: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x2d67cc8: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x2d68448: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x2d697a8: {[labels Value[132 x *]] }
-MPI Rank 2: 0x2d6aa08: {[Prior Value[132]] }
-MPI Rank 2: 0x2d70178: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x2d702d8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x2d70438: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x2d70a18: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2d70c08: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x2d72718: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x2d72e18: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x2d73028: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x2d73188: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2d732e8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x2d734a8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x2d73668: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x2d73828: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x2d74388: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x2d74548: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x2d74708: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x2d748c8: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 0x16d13b8: {[features Value[363 x *]] }
+MPI Rank 2: 0x1c81f78: {[W0 Value[512 x 363]] }
+MPI Rank 2: 0x1c90ba8: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 0x1c916f8: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 0x21d7578: {[W1 Value[512 x 512]] }
+MPI Rank 2: 0x21d8348: {[B1 Value[512 x 1]] }
+MPI Rank 2: 0x21d94e8: {[W2 Value[132 x 512]] }
+MPI Rank 2: 0x21d9c68: {[B2 Value[132 x 1]] }
+MPI Rank 2: 0x21dafc8: {[labels Value[132 x *]] }
+MPI Rank 2: 0x21dc228: {[Prior Value[132]] }
+MPI Rank 2: 0x21e1ac8: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 0x21e1dc8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 0x21e1f88: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 0x21e2588: {[LogOfPrior Value[132]] }
+MPI Rank 2: 0x21e89a8: {[B0 Value[512 x 1]] }
+MPI Rank 2: 0x24a33c8: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 0x24a3ac8: {[W0*features Value[512 x *]] }
+MPI Rank 2: 0x24a3ff8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 0x24a4158: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 0x24a4318: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x24a44d8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 0x24a4698: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 0x24a4858: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 0x24a4a18: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 0x24a5578: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 0x24a5738: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x24a58f8: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 0x24a5ab8: {[B2 Gradient[132 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 18:06:47: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:29: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:07:29: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:07:29: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 18:06:47: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 18:06:47: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 18:06:47: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:32: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 18:06:49: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:32: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 18:06:49: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.2198s; samplesPerSecond = 2911.6
-MPI Rank 2: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1236s; samplesPerSecond = 5179.6
-MPI Rank 2: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1232s; samplesPerSecond = 5193.2
-MPI Rank 2: 05/03/2016 18:07:32:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1224s; samplesPerSecond = 5227.4
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1220s; samplesPerSecond = 5244.9
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1233s; samplesPerSecond = 5190.1
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1230s; samplesPerSecond = 5201.5
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1212s; samplesPerSecond = 5278.4
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1229s; samplesPerSecond = 5207.7
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1230s; samplesPerSecond = 5204.6
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1228s; samplesPerSecond = 5211.7
-MPI Rank 2: 05/03/2016 18:07:33:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1226s; samplesPerSecond = 5220.1
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1225s; samplesPerSecond = 5226.2
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1223s; samplesPerSecond = 5234.5
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1227s; samplesPerSecond = 5215.3
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1225s; samplesPerSecond = 5224.7
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1229s; samplesPerSecond = 5205.9
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1231s; samplesPerSecond = 5198.5
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1215s; samplesPerSecond = 5268.7
-MPI Rank 2: 05/03/2016 18:07:34:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1226s; samplesPerSecond = 5219.2
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1261s; samplesPerSecond = 5077.2
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1260s; samplesPerSecond = 5080.8
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1225s; samplesPerSecond = 5225.8
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1224s; samplesPerSecond = 5226.8
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1221s; samplesPerSecond = 5240.3
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1224s; samplesPerSecond = 5227.3
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1218s; samplesPerSecond = 5255.6
-MPI Rank 2: 05/03/2016 18:07:35:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1223s; samplesPerSecond = 5232.6
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1236s; samplesPerSecond = 5176.2
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1225s; samplesPerSecond = 5226.4
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1228s; samplesPerSecond = 5212.8
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1231s; samplesPerSecond = 5198.4
-MPI Rank 2: 05/03/2016 18:07:36: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.1463s
+MPI Rank 2: 05/03/2016 18:06:49: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135292 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.1356s; samplesPerSecond = 4720.6
+MPI Rank 2: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070941 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.1255s; samplesPerSecond = 5100.6
+MPI Rank 2: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901060 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1235s; samplesPerSecond = 5182.3
+MPI Rank 2: 05/03/2016 18:06:49:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945819 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1245s; samplesPerSecond = 5142.3
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219554 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1269s; samplesPerSecond = 5042.6
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890764 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1267s; samplesPerSecond = 5052.0
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56187066 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.1238s; samplesPerSecond = 5169.9
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790300 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1220s; samplesPerSecond = 5245.3
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928335 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1232s; samplesPerSecond = 5195.9
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398779 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.1252s; samplesPerSecond = 5113.7
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223692 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.1242s; samplesPerSecond = 5152.0
+MPI Rank 2: 05/03/2016 18:06:50:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265356 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.1241s; samplesPerSecond = 5157.6
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14081692 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.1329s; samplesPerSecond = 4817.4
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00690026 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1242s; samplesPerSecond = 5154.8
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00496067 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.1237s; samplesPerSecond = 5174.0
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97859075 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.1237s; samplesPerSecond = 5175.7
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686598 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1238s; samplesPerSecond = 5168.1
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053367 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1242s; samplesPerSecond = 5152.5
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653380 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.1221s; samplesPerSecond = 5242.8
+MPI Rank 2: 05/03/2016 18:06:51:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702533 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1235s; samplesPerSecond = 5183.5
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61570798 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.1232s; samplesPerSecond = 5195.1
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55235583 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.1250s; samplesPerSecond = 5120.5
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211149 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.1239s; samplesPerSecond = 5164.3
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778372 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1242s; samplesPerSecond = 5154.6
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900907 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.1241s; samplesPerSecond = 5157.0
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967781 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1245s; samplesPerSecond = 5142.2
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281044 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.1237s; samplesPerSecond = 5174.3
+MPI Rank 2: 05/03/2016 18:06:52:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19669152 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.1247s; samplesPerSecond = 5133.0
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28979576 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.1239s; samplesPerSecond = 5165.6
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750535 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.1236s; samplesPerSecond = 5179.0
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26264396 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.1239s; samplesPerSecond = 5164.7
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15073108 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.1242s; samplesPerSecond = 5150.9
+MPI Rank 2: 05/03/2016 18:06:53: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995691 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.00605s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:36: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 18:06:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:36: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1430s; samplesPerSecond = 17907.1
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1356s; samplesPerSecond = 18879.3
-MPI Rank 2: 05/03/2016 18:07:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1362s; samplesPerSecond = 18797.7
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1345s; samplesPerSecond = 19032.6
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1345s; samplesPerSecond = 19028.5
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1344s; samplesPerSecond = 19052.6
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1340s; samplesPerSecond = 19102.9
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1358s; samplesPerSecond = 18853.9
-MPI Rank 2: 05/03/2016 18:07:37: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.09896s
+MPI Rank 2: 05/03/2016 18:06:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598513 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1441s; samplesPerSecond = 17760.0
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818589 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.1363s; samplesPerSecond = 18780.7
+MPI Rank 2: 05/03/2016 18:06:53:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698117 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1373s; samplesPerSecond = 18642.2
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126300 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.1359s; samplesPerSecond = 18830.6
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067741 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.1352s; samplesPerSecond = 18933.0
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115807 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1350s; samplesPerSecond = 18956.1
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518062 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.1352s; samplesPerSecond = 18932.3
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450399 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.1362s; samplesPerSecond = 18792.2
+MPI Rank 2: 05/03/2016 18:06:54: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924191 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.10654s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:37: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 18:06:54: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 18:07:37:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2056s; samplesPerSecond = 49813.7
-MPI Rank 2: 05/03/2016 18:07:38:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2041s; samplesPerSecond = 50178.6
-MPI Rank 2: 05/03/2016 18:07:38: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.421661s
-MPI Rank 2: 05/03/2016 18:07:38: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 18:06:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 18:06:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359847 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2107s; samplesPerSecond = 48604.7
+MPI Rank 2: 05/03/2016 18:06:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656269 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.2102s; samplesPerSecond = 48722.9
+MPI Rank 2: 05/03/2016 18:06:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008058 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.43352s
+MPI Rank 2: 05/03/2016 18:06:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:38: Action "train" complete.
+MPI Rank 2: 05/03/2016 18:06:55: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:07:38: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 18:06:55: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
index ca716079b..f1e5047d9 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=8 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -57,19 +57,19 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
-ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (before change)]: all 3 nodes responded
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 2 in a gearbox of 3
 mpihelper: we are cog 1 in a gearbox of 3
+mpihelper: we are cog 2 in a gearbox of 3
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: 3 nodes pinging each other
@@ -91,32 +91,32 @@ job aborted:
 C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 14:22:56: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 14:22:56: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 14:22:56: Build info: 
+MPI Rank 0: 05/03/2016 14:21:56: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:21:56: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:21:56: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: 		Built time: May  3 2016 13:23:06
-MPI Rank 0: 05/03/2016 14:22:56: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 0: 05/03/2016 14:22:56: 		Build type: Release
-MPI Rank 0: 05/03/2016 14:22:56: 		Build target: GPU
-MPI Rank 0: 05/03/2016 14:22:56: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 14:22:56: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 14:22:56: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 14:22:56: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 14:22:56: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 14:22:56: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 14:22:56: 		Built by svcphil on LIANA-09-w
-MPI Rank 0: 05/03/2016 14:22:56: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 14:22:56: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:21:56: 		Built time: May  3 2016 13:23:06
+MPI Rank 0: 05/03/2016 14:21:56: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 0: 05/03/2016 14:21:56: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:21:56: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:21:56: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:21:56: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:21:56: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:21:56: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:21:56: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:21:56: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:21:56: 		Built by svcphil on LIANA-09-w
+MPI Rank 0: 05/03/2016 14:21:56: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:21:56: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: Running on DPHAIM-25 at 2016/05/03 14:22:56
-MPI Rank 0: 05/03/2016 14:22:56: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: 05/03/2016 14:21:56: Running on DPHAIM-25 at 2016/05/03 14:21:56
+MPI Rank 0: 05/03/2016 14:21:56: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:22:56: precision = "float"
+MPI Rank 0: 05/03/2016 14:21:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:21:56: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -206,27 +206,25 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:21:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:22:56: precision = "float"
+MPI Rank 0: 05/03/2016 14:21:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:21:56: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -310,33 +308,31 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:21:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:21:56: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=float
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -418,35 +414,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 0: ]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 14:22:56: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 14:22:56: Commands: speechTrain
-MPI Rank 0: 05/03/2016 14:22:56: Precision = "float"
-MPI Rank 0: 05/03/2016 14:22:56: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 14:22:56: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 14:22:56: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 14:22:56: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 14:21:56: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:21:56: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:21:56: Precision = "float"
+MPI Rank 0: 05/03/2016 14:21:56: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 14:21:56: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:21:56: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 14:21:56: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: ##############################################################################
-MPI Rank 0: 05/03/2016 14:22:56: #                                                                            #
-MPI Rank 0: 05/03/2016 14:22:56: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 14:22:56: #                                                                            #
-MPI Rank 0: 05/03/2016 14:22:56: ##############################################################################
+MPI Rank 0: 05/03/2016 14:21:56: ##############################################################################
+MPI Rank 0: 05/03/2016 14:21:56: #                                                                            #
+MPI Rank 0: 05/03/2016 14:21:56: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:21:56: #                                                                            #
+MPI Rank 0: 05/03/2016 14:21:56: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:56: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:21:56: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:21:56: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -498,14 +493,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 14:21:56: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: Training criterion node(s):
-MPI Rank 0: 05/03/2016 14:22:57: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:21:56: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:21:56: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:21:56: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:21:56: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -513,135 +508,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 000000DC5C1A61F0: {[B0 Value[512 x 1]] }
-MPI Rank 0: 000000DC5C1A6970: {[features Value[363 x *]] }
-MPI Rank 0: 000000DC5C1A6A10: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 000000DC5C1A6B50: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 000000DC5C1A6BF0: {[W0 Value[512 x 363]] }
-MPI Rank 0: 000000DC652BB640: {[W1 Value[512 x 512]] }
-MPI Rank 0: 000000DC652BB820: {[B2 Value[132 x 1]] }
-MPI Rank 0: 000000DC652BBA00: {[labels Value[132 x *]] }
-MPI Rank 0: 000000DC652BBB40: {[Prior Value[132]] }
-MPI Rank 0: 000000DC652BBD20: {[B1 Value[512 x 1]] }
-MPI Rank 0: 000000DC652BBF00: {[W2 Value[132 x 512]] }
-MPI Rank 0: 000000DC652BC400: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 000000DC652BC4A0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 000000DC652BC5E0: {[W0*features Value[512 x *]] }
-MPI Rank 0: 000000DC652BC7C0: {[LogOfPrior Value[132]] }
-MPI Rank 0: 000000DC652BC860: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 000000DC652BC900: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 000000DC652BCA40: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000DC652BCAE0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 000000DC652BCB80: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 000000DC652BCCC0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000DC652BCD60: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 000000DC652BCF40: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 000000DC652BD080: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 000000DC6533A420: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000DC6533A880: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 000000DC6533AC40: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000DC6533B280: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000FB76867E90: {[B0 Value[512 x 1]] }
+MPI Rank 0: 000000FB76868110: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 000000FB768684D0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 000000FB76868570: {[features Value[363 x *]] }
+MPI Rank 0: 000000FB76868A70: {[W0 Value[512 x 363]] }
+MPI Rank 0: 000000FB7FD13270: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000FB7FD133B0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 000000FB7FD13630: {[B1 Value[512 x 1]] }
+MPI Rank 0: 000000FB7FD13770: {[W2 Value[132 x 512]] }
+MPI Rank 0: 000000FB7FD13950: {[W1 Value[512 x 512]] }
+MPI Rank 0: 000000FB7FD13A90: {[Prior Value[132]] }
+MPI Rank 0: 000000FB7FD13C70: {[labels Value[132 x *]] }
+MPI Rank 0: 000000FB7FD13E50: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 000000FB7FD13EF0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 000000FB7FD14030: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 000000FB7FD145D0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000FB7FD14670: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000FB7FD14850: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 000000FB7FD148F0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 000000FB7FD14990: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 000000FB7FD14A30: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000FB7FD14B70: {[B2 Value[132 x 1]] }
+MPI Rank 0: 000000FB7FD14D50: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 000000FB7FD14E90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000FB7FD14F30: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 000000FB7FD15110: {[W0*features Value[512 x *]] }
+MPI Rank 0: 000000FB7FD86F50: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000FB7FD87310: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:21:56: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:57: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 14:22:57: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 14:22:57: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:21:56: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:21:56: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:21:56: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:58: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:21:58: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:21:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:22:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.3091s; samplesPerSecond = 2070.6
-MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.2831s; samplesPerSecond = 2260.8
-MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.2876s; samplesPerSecond = 2225.3
-MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.2901s; samplesPerSecond = 2206.2
-MPI Rank 0: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.2910s; samplesPerSecond = 2199.3
-MPI Rank 0: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3012s; samplesPerSecond = 2124.7
-MPI Rank 0: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.3068s; samplesPerSecond = 2086.1
-MPI Rank 0: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.3120s; samplesPerSecond = 2051.2
-MPI Rank 0: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.3164s; samplesPerSecond = 2023.0
-MPI Rank 0: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3242s; samplesPerSecond = 1974.0
-MPI Rank 0: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.2743s; samplesPerSecond = 2333.5
-MPI Rank 0: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.2729s; samplesPerSecond = 2345.2
-MPI Rank 0: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.2796s; samplesPerSecond = 2288.9
-MPI Rank 0: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2633s; samplesPerSecond = 2430.6
-MPI Rank 0: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.2578s; samplesPerSecond = 2482.9
-MPI Rank 0: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.2593s; samplesPerSecond = 2467.7
-MPI Rank 0: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.2661s; samplesPerSecond = 2404.7
-MPI Rank 0: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.2744s; samplesPerSecond = 2332.4
-MPI Rank 0: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2718s; samplesPerSecond = 2354.4
-MPI Rank 0: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.2759s; samplesPerSecond = 2319.4
-MPI Rank 0: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.2925s; samplesPerSecond = 2187.7
-MPI Rank 0: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2871s; samplesPerSecond = 2229.5
-MPI Rank 0: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.2962s; samplesPerSecond = 2160.4
-MPI Rank 0: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.3083s; samplesPerSecond = 2076.2
-MPI Rank 0: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3078s; samplesPerSecond = 2079.0
-MPI Rank 0: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3129s; samplesPerSecond = 2045.2
-MPI Rank 0: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3170s; samplesPerSecond = 2018.9
-MPI Rank 0: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.2896s; samplesPerSecond = 2210.0
-MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2867s; samplesPerSecond = 2232.4
-MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.2820s; samplesPerSecond = 2269.7
-MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.2841s; samplesPerSecond = 2252.9
-MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2517s; samplesPerSecond = 2542.4
-MPI Rank 0: 05/03/2016 14:23:08: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=9.34079s
-MPI Rank 0: 05/03/2016 14:23:09: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:21:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9048s; samplesPerSecond = 707.4
+MPI Rank 0: 05/03/2016 14:22:01:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.9849s; samplesPerSecond = 649.8
+MPI Rank 0: 05/03/2016 14:22:02:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 1.0696s; samplesPerSecond = 598.4
+MPI Rank 0: 05/03/2016 14:22:03:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 1.1540s; samplesPerSecond = 554.6
+MPI Rank 0: 05/03/2016 14:22:04:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.9880s; samplesPerSecond = 647.8
+MPI Rank 0: 05/03/2016 14:22:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 1.1191s; samplesPerSecond = 571.9
+MPI Rank 0: 05/03/2016 14:22:06:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 1.1283s; samplesPerSecond = 567.2
+MPI Rank 0: 05/03/2016 14:22:07:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.9280s; samplesPerSecond = 689.6
+MPI Rank 0: 05/03/2016 14:22:08:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 1.1305s; samplesPerSecond = 566.1
+MPI Rank 0: 05/03/2016 14:22:09:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 1.1184s; samplesPerSecond = 572.2
+MPI Rank 0: 05/03/2016 14:22:10:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.9809s; samplesPerSecond = 652.4
+MPI Rank 0: 05/03/2016 14:22:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 1.0724s; samplesPerSecond = 596.8
+MPI Rank 0: 05/03/2016 14:22:12:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.9973s; samplesPerSecond = 641.7
+MPI Rank 0: 05/03/2016 14:22:13:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 1.0616s; samplesPerSecond = 602.9
+MPI Rank 0: 05/03/2016 14:22:14:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 1.1026s; samplesPerSecond = 580.5
+MPI Rank 0: 05/03/2016 14:22:15:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 1.0101s; samplesPerSecond = 633.6
+MPI Rank 0: 05/03/2016 14:22:17:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 1.1249s; samplesPerSecond = 568.9
+MPI Rank 0: 05/03/2016 14:22:18:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 1.0050s; samplesPerSecond = 636.8
+MPI Rank 0: 05/03/2016 14:22:19:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0629s; samplesPerSecond = 602.1
+MPI Rank 0: 05/03/2016 14:22:20:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 1.2601s; samplesPerSecond = 507.9
+MPI Rank 0: 05/03/2016 14:22:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.8915s; samplesPerSecond = 717.9
+MPI Rank 0: 05/03/2016 14:22:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0340s; samplesPerSecond = 618.9
+MPI Rank 0: 05/03/2016 14:22:23:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 1.1389s; samplesPerSecond = 561.9
+MPI Rank 0: 05/03/2016 14:22:24:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 1.0311s; samplesPerSecond = 620.7
+MPI Rank 0: 05/03/2016 14:22:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 1.1241s; samplesPerSecond = 569.4
+MPI Rank 0: 05/03/2016 14:22:26:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 1.0450s; samplesPerSecond = 612.4
+MPI Rank 0: 05/03/2016 14:22:27:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 1.0381s; samplesPerSecond = 616.5
+MPI Rank 0: 05/03/2016 14:22:28:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 1.0684s; samplesPerSecond = 599.0
+MPI Rank 0: 05/03/2016 14:22:29:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 1.0702s; samplesPerSecond = 598.0
+MPI Rank 0: 05/03/2016 14:22:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 1.1756s; samplesPerSecond = 544.4
+MPI Rank 0: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 1.0127s; samplesPerSecond = 632.0
+MPI Rank 0: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.9480s; samplesPerSecond = 675.1
+MPI Rank 0: 05/03/2016 14:22:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=33.9013s
+MPI Rank 0: 05/03/2016 14:22:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:22:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.4107s; samplesPerSecond = 6233.7
-MPI Rank 0: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.3954s; samplesPerSecond = 6474.3
-MPI Rank 0: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.3999s; samplesPerSecond = 6402.2
-MPI Rank 0: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.4055s; samplesPerSecond = 6313.7
-MPI Rank 0: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.4079s; samplesPerSecond = 6276.4
-MPI Rank 0: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.4166s; samplesPerSecond = 6145.7
-MPI Rank 0: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.4195s; samplesPerSecond = 6102.2
-MPI Rank 0: 05/03/2016 14:23:12:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.4237s; samplesPerSecond = 6041.7
-MPI Rank 0: 05/03/2016 14:23:12: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.30784s
-MPI Rank 0: 05/03/2016 14:23:12: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 14:22:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 1.3149s; samplesPerSecond = 1946.9
+MPI Rank 0: 05/03/2016 14:22:35:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.1678s; samplesPerSecond = 2192.2
+MPI Rank 0: 05/03/2016 14:22:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 1.2047s; samplesPerSecond = 2124.9
+MPI Rank 0: 05/03/2016 14:22:38:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 1.2674s; samplesPerSecond = 2019.8
+MPI Rank 0: 05/03/2016 14:22:39:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 1.0578s; samplesPerSecond = 2420.1
+MPI Rank 0: 05/03/2016 14:22:40:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 1.1674s; samplesPerSecond = 2193.0
+MPI Rank 0: 05/03/2016 14:22:41:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 1.2951s; samplesPerSecond = 1976.7
+MPI Rank 0: 05/03/2016 14:22:42:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 1.1899s; samplesPerSecond = 2151.5
+MPI Rank 0: 05/03/2016 14:22:42: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=9.75712s
+MPI Rank 0: 05/03/2016 14:22:42: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:12: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:22:42: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:12: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.9897s; samplesPerSecond = 10346.4
-MPI Rank 0: 05/03/2016 14:23:14:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.9753s; samplesPerSecond = 10499.2
-MPI Rank 0: 05/03/2016 14:23:14: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.99387s
-MPI Rank 0: 05/03/2016 14:23:14: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 14:23:14: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:22:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:44:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 1.7740s; samplesPerSecond = 5772.4
+MPI Rank 0: 05/03/2016 14:22:46:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 1.7771s; samplesPerSecond = 5762.1
+MPI Rank 0: 05/03/2016 14:22:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.64439s
+MPI Rank 0: 05/03/2016 14:22:46: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:22:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:14: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:22:46: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:14: __COMPLETED__
-MPI Rank 1: 05/03/2016 14:22:57: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 14:22:57: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 14:22:57: Build info: 
+MPI Rank 0: 05/03/2016 14:22:46: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:21:56: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:21:56: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:21:57: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: 		Built time: May  3 2016 13:23:06
-MPI Rank 1: 05/03/2016 14:22:57: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 1: 05/03/2016 14:22:57: 		Build type: Release
-MPI Rank 1: 05/03/2016 14:22:57: 		Build target: GPU
-MPI Rank 1: 05/03/2016 14:22:57: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 14:22:57: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 14:22:57: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 14:22:57: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 14:22:57: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 14:22:57: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 14:22:57: 		Built by svcphil on LIANA-09-w
-MPI Rank 1: 05/03/2016 14:22:57: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 14:22:57: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:21:57: 		Built time: May  3 2016 13:23:06
+MPI Rank 1: 05/03/2016 14:21:57: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 1: 05/03/2016 14:21:57: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:21:57: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:21:57: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:21:57: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:21:57: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:21:57: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:21:57: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:21:57: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:21:57: 		Built by svcphil on LIANA-09-w
+MPI Rank 1: 05/03/2016 14:21:57: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:21:57: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Running on DPHAIM-25 at 2016/05/03 14:22:57
-MPI Rank 1: 05/03/2016 14:22:57: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: 05/03/2016 14:21:57: Running on DPHAIM-25 at 2016/05/03 14:21:57
+MPI Rank 1: 05/03/2016 14:21:57: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:22:57: precision = "float"
+MPI Rank 1: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:21:57: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -731,27 +731,25 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:22:57: precision = "float"
+MPI Rank 1: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:21:57: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -835,33 +833,31 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=float
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -943,35 +939,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 1: ]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 14:22:57: Commands: speechTrain
-MPI Rank 1: 05/03/2016 14:22:57: Precision = "float"
-MPI Rank 1: 05/03/2016 14:22:57: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 14:22:57: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 14:22:57: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 14:22:57: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:21:57: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:21:57: Precision = "float"
+MPI Rank 1: 05/03/2016 14:21:57: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 14:21:57: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:21:57: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 14:21:57: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: ##############################################################################
-MPI Rank 1: 05/03/2016 14:22:57: #                                                                            #
-MPI Rank 1: 05/03/2016 14:22:57: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 14:22:57: #                                                                            #
-MPI Rank 1: 05/03/2016 14:22:57: ##############################################################################
+MPI Rank 1: 05/03/2016 14:21:57: ##############################################################################
+MPI Rank 1: 05/03/2016 14:21:57: #                                                                            #
+MPI Rank 1: 05/03/2016 14:21:57: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:21:57: #                                                                            #
+MPI Rank 1: 05/03/2016 14:21:57: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:21:57: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:21:57: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1023,14 +1018,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 14:21:57: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Training criterion node(s):
-MPI Rank 1: 05/03/2016 14:22:57: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:21:57: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:21:57: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:21:57: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:21:57: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1038,132 +1033,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 00000082B5FD5ED0: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 00000082B5FD5F70: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 00000082B5FD61F0: {[W0 Value[512 x 363]] }
-MPI Rank 1: 00000082B5FD6470: {[B0 Value[512 x 1]] }
-MPI Rank 1: 00000082B5FD6AB0: {[features Value[363 x *]] }
-MPI Rank 1: 00000082B602C890: {[LogOfPrior Value[132]] }
-MPI Rank 1: 00000082B602C930: {[W0*features Value[512 x *]] }
-MPI Rank 1: 00000082B602C9D0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 00000082B602CA70: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 00000082B602CBB0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 00000082B602CD90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000082B602CE30: {[labels Value[132 x *]] }
-MPI Rank 1: 00000082B602CED0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000082B602CF70: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 00000082B602D0B0: {[B2 Value[132 x 1]] }
-MPI Rank 1: 00000082B602D290: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 00000082B602D330: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 00000082B602D3D0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 00000082B602D970: {[W2 Value[132 x 512]] }
-MPI Rank 1: 00000082B602DAB0: {[W1 Value[512 x 512]] }
-MPI Rank 1: 00000082B602DC90: {[B1 Value[512 x 1]] }
-MPI Rank 1: 00000082B602DFB0: {[Prior Value[132]] }
-MPI Rank 1: 00000082B602E230: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 00000082B602E4B0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 00000082BF3BD4F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000082BF3BDA90: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 00000082BF3BE3F0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 00000082BF3BEA30: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000B8A1E77C10: {[features Value[363 x *]] }
+MPI Rank 1: 000000B8A1E77CB0: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 000000B8A1E77E90: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 000000B8A1E78250: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000B8A1E78430: {[B0 Value[512 x 1]] }
+MPI Rank 1: 000000B8AB5020F0: {[W1 Value[512 x 512]] }
+MPI Rank 1: 000000B8AB5022D0: {[Prior Value[132]] }
+MPI Rank 1: 000000B8AB5024B0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000B8AB5027D0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000B8AB502910: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000B8AB502C30: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000B8AB502CD0: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000B8AB502F50: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000B8AB502FF0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000B8AB503130: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000B8AB5033B0: {[LogOfPrior Value[132]] }
+MPI Rank 1: 000000B8AB503770: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000B8AB5038B0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000B8AB503950: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000B8AB503BD0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000B8AB503C70: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 000000B8AB503D10: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000B8AB503DB0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000B8AB503E50: {[labels Value[132 x *]] }
+MPI Rank 1: 000000B8AB503EF0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000B8AB503F90: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000B8AB5939F0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000B8AB594170: {[B2 Gradient[132 x 1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:21:57: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:57: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 14:22:57: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 14:22:57: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:21:57: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:21:57: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:21:57: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:58: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:21:58: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:21:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:22:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.3290s; samplesPerSecond = 1945.5
-MPI Rank 1: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.2831s; samplesPerSecond = 2261.0
-MPI Rank 1: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.2875s; samplesPerSecond = 2225.7
-MPI Rank 1: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.2900s; samplesPerSecond = 2206.9
-MPI Rank 1: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.2909s; samplesPerSecond = 2199.7
-MPI Rank 1: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3012s; samplesPerSecond = 2125.1
-MPI Rank 1: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.3067s; samplesPerSecond = 2086.5
-MPI Rank 1: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.3120s; samplesPerSecond = 2051.5
-MPI Rank 1: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.3182s; samplesPerSecond = 2011.6
-MPI Rank 1: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3223s; samplesPerSecond = 1985.5
-MPI Rank 1: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.2743s; samplesPerSecond = 2333.3
-MPI Rank 1: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.2729s; samplesPerSecond = 2345.4
-MPI Rank 1: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.2814s; samplesPerSecond = 2274.7
-MPI Rank 1: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2615s; samplesPerSecond = 2447.9
-MPI Rank 1: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.2577s; samplesPerSecond = 2483.3
-MPI Rank 1: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.2593s; samplesPerSecond = 2467.9
-MPI Rank 1: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.2661s; samplesPerSecond = 2405.2
-MPI Rank 1: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.2744s; samplesPerSecond = 2332.5
-MPI Rank 1: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2714s; samplesPerSecond = 2358.1
-MPI Rank 1: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.2781s; samplesPerSecond = 2301.6
-MPI Rank 1: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.2907s; samplesPerSecond = 2201.6
-MPI Rank 1: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2871s; samplesPerSecond = 2229.5
-MPI Rank 1: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.2962s; samplesPerSecond = 2160.9
-MPI Rank 1: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.3083s; samplesPerSecond = 2076.0
-MPI Rank 1: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3078s; samplesPerSecond = 2079.3
-MPI Rank 1: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3129s; samplesPerSecond = 2045.6
-MPI Rank 1: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3167s; samplesPerSecond = 2020.9
-MPI Rank 1: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.2898s; samplesPerSecond = 2208.1
-MPI Rank 1: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2866s; samplesPerSecond = 2233.2
-MPI Rank 1: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.2819s; samplesPerSecond = 2270.1
-MPI Rank 1: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.2840s; samplesPerSecond = 2253.2
-MPI Rank 1: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2501s; samplesPerSecond = 2559.5
-MPI Rank 1: 05/03/2016 14:23:08: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=9.34079s
+MPI Rank 1: 05/03/2016 14:21:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9036s; samplesPerSecond = 708.2
+MPI Rank 1: 05/03/2016 14:22:01:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.9882s; samplesPerSecond = 647.6
+MPI Rank 1: 05/03/2016 14:22:02:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 1.0697s; samplesPerSecond = 598.3
+MPI Rank 1: 05/03/2016 14:22:03:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 1.1541s; samplesPerSecond = 554.6
+MPI Rank 1: 05/03/2016 14:22:04:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.9880s; samplesPerSecond = 647.8
+MPI Rank 1: 05/03/2016 14:22:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 1.1192s; samplesPerSecond = 571.8
+MPI Rank 1: 05/03/2016 14:22:06:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 1.1282s; samplesPerSecond = 567.3
+MPI Rank 1: 05/03/2016 14:22:07:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.9279s; samplesPerSecond = 689.7
+MPI Rank 1: 05/03/2016 14:22:08:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 1.1303s; samplesPerSecond = 566.2
+MPI Rank 1: 05/03/2016 14:22:09:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 1.1188s; samplesPerSecond = 572.0
+MPI Rank 1: 05/03/2016 14:22:10:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.9810s; samplesPerSecond = 652.4
+MPI Rank 1: 05/03/2016 14:22:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 1.0724s; samplesPerSecond = 596.8
+MPI Rank 1: 05/03/2016 14:22:12:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.9940s; samplesPerSecond = 643.9
+MPI Rank 1: 05/03/2016 14:22:13:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 1.0649s; samplesPerSecond = 601.0
+MPI Rank 1: 05/03/2016 14:22:14:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 1.1027s; samplesPerSecond = 580.4
+MPI Rank 1: 05/03/2016 14:22:15:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 1.0101s; samplesPerSecond = 633.6
+MPI Rank 1: 05/03/2016 14:22:17:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 1.1248s; samplesPerSecond = 569.0
+MPI Rank 1: 05/03/2016 14:22:18:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 1.0051s; samplesPerSecond = 636.8
+MPI Rank 1: 05/03/2016 14:22:19:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0597s; samplesPerSecond = 604.0
+MPI Rank 1: 05/03/2016 14:22:20:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 1.2601s; samplesPerSecond = 507.9
+MPI Rank 1: 05/03/2016 14:22:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.8949s; samplesPerSecond = 715.2
+MPI Rank 1: 05/03/2016 14:22:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0340s; samplesPerSecond = 618.9
+MPI Rank 1: 05/03/2016 14:22:23:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 1.1359s; samplesPerSecond = 563.5
+MPI Rank 1: 05/03/2016 14:22:24:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 1.0340s; samplesPerSecond = 618.9
+MPI Rank 1: 05/03/2016 14:22:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 1.1209s; samplesPerSecond = 571.0
+MPI Rank 1: 05/03/2016 14:22:26:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 1.0482s; samplesPerSecond = 610.6
+MPI Rank 1: 05/03/2016 14:22:27:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 1.0348s; samplesPerSecond = 618.5
+MPI Rank 1: 05/03/2016 14:22:28:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 1.0718s; samplesPerSecond = 597.1
+MPI Rank 1: 05/03/2016 14:22:29:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 1.0702s; samplesPerSecond = 598.0
+MPI Rank 1: 05/03/2016 14:22:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 1.1757s; samplesPerSecond = 544.4
+MPI Rank 1: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 1.0127s; samplesPerSecond = 632.0
+MPI Rank 1: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.9500s; samplesPerSecond = 673.7
+MPI Rank 1: 05/03/2016 14:22:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=33.9031s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:22:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.4088s; samplesPerSecond = 6262.4
-MPI Rank 1: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.3954s; samplesPerSecond = 6474.5
-MPI Rank 1: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.3998s; samplesPerSecond = 6402.8
-MPI Rank 1: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.4055s; samplesPerSecond = 6313.7
-MPI Rank 1: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.4079s; samplesPerSecond = 6276.8
-MPI Rank 1: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.4165s; samplesPerSecond = 6146.4
-MPI Rank 1: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.4195s; samplesPerSecond = 6102.4
-MPI Rank 1: 05/03/2016 14:23:12:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.4214s; samplesPerSecond = 6075.3
-MPI Rank 1: 05/03/2016 14:23:12: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.30784s
+MPI Rank 1: 05/03/2016 14:22:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 1.3177s; samplesPerSecond = 1942.8
+MPI Rank 1: 05/03/2016 14:22:35:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.1715s; samplesPerSecond = 2185.3
+MPI Rank 1: 05/03/2016 14:22:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 1.2048s; samplesPerSecond = 2124.8
+MPI Rank 1: 05/03/2016 14:22:38:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 1.2674s; samplesPerSecond = 2019.9
+MPI Rank 1: 05/03/2016 14:22:39:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 1.0579s; samplesPerSecond = 2419.8
+MPI Rank 1: 05/03/2016 14:22:40:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 1.1673s; samplesPerSecond = 2193.1
+MPI Rank 1: 05/03/2016 14:22:41:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 1.2946s; samplesPerSecond = 1977.4
+MPI Rank 1: 05/03/2016 14:22:42:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 1.1913s; samplesPerSecond = 2148.9
+MPI Rank 1: 05/03/2016 14:22:42: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=9.76329s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:12: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:22:42: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:12: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.9896s; samplesPerSecond = 10347.2
-MPI Rank 1: 05/03/2016 14:23:14:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.9709s; samplesPerSecond = 10547.1
-MPI Rank 1: 05/03/2016 14:23:14: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.99387s
-MPI Rank 1: 05/03/2016 14:23:14: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:22:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:44:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 1.7876s; samplesPerSecond = 5728.5
+MPI Rank 1: 05/03/2016 14:22:46:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 1.7707s; samplesPerSecond = 5783.1
+MPI Rank 1: 05/03/2016 14:22:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.65489s
+MPI Rank 1: 05/03/2016 14:22:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:14: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:22:46: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:14: __COMPLETED__
-MPI Rank 2: 05/03/2016 14:22:57: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 14:22:57: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 14:22:57: Build info: 
+MPI Rank 1: 05/03/2016 14:22:46: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:21:57: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:21:57: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:21:57: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: 		Built time: May  3 2016 13:23:06
-MPI Rank 2: 05/03/2016 14:22:57: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 2: 05/03/2016 14:22:57: 		Build type: Release
-MPI Rank 2: 05/03/2016 14:22:57: 		Build target: GPU
-MPI Rank 2: 05/03/2016 14:22:57: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 14:22:57: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 14:22:57: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 14:22:57: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 14:22:57: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 14:22:57: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 14:22:57: 		Built by svcphil on LIANA-09-w
-MPI Rank 2: 05/03/2016 14:22:57: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 14:22:57: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:21:57: 		Built time: May  3 2016 13:23:06
+MPI Rank 2: 05/03/2016 14:21:57: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 2: 05/03/2016 14:21:57: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:21:57: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:21:57: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:21:57: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:21:57: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:21:57: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:21:57: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:21:57: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:21:57: 		Built by svcphil on LIANA-09-w
+MPI Rank 2: 05/03/2016 14:21:57: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:21:57: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: Running on DPHAIM-25 at 2016/05/03 14:22:57
-MPI Rank 2: 05/03/2016 14:22:57: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: 05/03/2016 14:21:57: Running on DPHAIM-25 at 2016/05/03 14:21:57
+MPI Rank 2: 05/03/2016 14:21:57: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:22:57: precision = "float"
+MPI Rank 2: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:21:57: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1253,27 +1253,25 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:22:57: precision = "float"
+MPI Rank 2: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:21:57: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1357,33 +1355,31 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:21:57: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=float
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1465,35 +1461,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 2: ]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 14:22:57: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 14:22:57: Commands: speechTrain
-MPI Rank 2: 05/03/2016 14:22:57: Precision = "float"
-MPI Rank 2: 05/03/2016 14:22:57: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 14:22:57: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 14:22:57: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 14:22:57: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 14:21:57: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:21:57: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:21:57: Precision = "float"
+MPI Rank 2: 05/03/2016 14:21:57: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 14:21:57: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:21:57: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 14:21:57: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: ##############################################################################
-MPI Rank 2: 05/03/2016 14:22:57: #                                                                            #
-MPI Rank 2: 05/03/2016 14:22:57: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 14:22:57: #                                                                            #
-MPI Rank 2: 05/03/2016 14:22:57: ##############################################################################
+MPI Rank 2: 05/03/2016 14:21:57: ##############################################################################
+MPI Rank 2: 05/03/2016 14:21:57: #                                                                            #
+MPI Rank 2: 05/03/2016 14:21:57: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:21:57: #                                                                            #
+MPI Rank 2: 05/03/2016 14:21:57: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:57: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:21:57: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:21:58: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1545,14 +1540,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 14:21:58: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: Training criterion node(s):
-MPI Rank 2: 05/03/2016 14:22:58: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:21:58: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:21:58: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:21:58: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:21:58: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1560,103 +1555,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 000000B4531A6150: {[W0 Value[512 x 363]] }
-MPI Rank 2: 000000B4531A61F0: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 000000B4531A65B0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 000000B4531A6790: {[features Value[363 x *]] }
-MPI Rank 2: 000000B4531A6970: {[B0 Value[512 x 1]] }
-MPI Rank 2: 000000B4531FC890: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 000000B4531FD650: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000B4531FD6F0: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 000000B4531FDB50: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000B45C4F40F0: {[B1 Value[512 x 1]] }
-MPI Rank 2: 000000B45C4F4550: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 000000B45C4F45F0: {[W0*features Value[512 x *]] }
-MPI Rank 2: 000000B45C4F4690: {[W2 Value[132 x 512]] }
-MPI Rank 2: 000000B45C4F4730: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 000000B45C4F47D0: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 000000B45C4F4870: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000B45C4F4910: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 000000B45C4F49B0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 000000B45C4F4CD0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000B45C4F4D70: {[labels Value[132 x *]] }
-MPI Rank 2: 000000B45C4F4E10: {[W1 Value[512 x 512]] }
-MPI Rank 2: 000000B45C4F5270: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 000000B45C4F54F0: {[Prior Value[132]] }
-MPI Rank 2: 000000B45C4F5810: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 000000B45C4F5A90: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 000000B45C4F5BD0: {[B2 Value[132 x 1]] }
-MPI Rank 2: 000000B45C4F5D10: {[LogOfPrior Value[132]] }
-MPI Rank 2: 000000B45C4F5EF0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000A647836CF0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000A647836F70: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000A6478371F0: {[features Value[363 x *]] }
+MPI Rank 2: 000000A647837650: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000A647837AB0: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000A650C588B0: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000A650C589F0: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000A650C58DB0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000A650C58E50: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000A650C58EF0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000A650C58F90: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000A650C590D0: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000A650C59170: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000A650C59210: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000A650C592B0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000A650C59350: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000A650C59530: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000A650C595D0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000A650C59D50: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000A650C59F30: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000A650C5A070: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000A650C5A1B0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000A650C5A250: {[labels Value[132 x *]] }
+MPI Rank 2: 000000A650C5A2F0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000A650C5A390: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000A650C5A430: {[Prior Value[132]] }
+MPI Rank 2: 000000A650D7AB40: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 000000A650D7C4E0: {[W2*H1 Gradient[132 x 1 x *]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:21:58: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:58: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 14:22:58: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 14:22:58: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:21:58: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:21:58: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:21:58: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:59: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:21:59: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:21:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:22:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.3073s; samplesPerSecond = 2082.3
-MPI Rank 2: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.2831s; samplesPerSecond = 2260.8
-MPI Rank 2: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.2876s; samplesPerSecond = 2225.5
-MPI Rank 2: 05/03/2016 14:23:00:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.2900s; samplesPerSecond = 2206.6
-MPI Rank 2: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.2910s; samplesPerSecond = 2199.6
-MPI Rank 2: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3012s; samplesPerSecond = 2125.0
-MPI Rank 2: 05/03/2016 14:23:01:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.3068s; samplesPerSecond = 2086.2
-MPI Rank 2: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.3120s; samplesPerSecond = 2051.2
-MPI Rank 2: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.3200s; samplesPerSecond = 2000.2
-MPI Rank 2: 05/03/2016 14:23:02:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3206s; samplesPerSecond = 1996.5
-MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.2743s; samplesPerSecond = 2332.9
-MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.2729s; samplesPerSecond = 2345.1
-MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.2835s; samplesPerSecond = 2257.1
-MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.2593s; samplesPerSecond = 2468.3
-MPI Rank 2: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.2577s; samplesPerSecond = 2483.1
-MPI Rank 2: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.2594s; samplesPerSecond = 2467.5
-MPI Rank 2: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.2661s; samplesPerSecond = 2405.1
-MPI Rank 2: 05/03/2016 14:23:04:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.2744s; samplesPerSecond = 2332.2
-MPI Rank 2: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2714s; samplesPerSecond = 2358.0
-MPI Rank 2: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.2799s; samplesPerSecond = 2286.4
-MPI Rank 2: 05/03/2016 14:23:05:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.2889s; samplesPerSecond = 2215.5
-MPI Rank 2: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.2870s; samplesPerSecond = 2229.9
-MPI Rank 2: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.2962s; samplesPerSecond = 2160.6
-MPI Rank 2: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.3083s; samplesPerSecond = 2075.8
-MPI Rank 2: 05/03/2016 14:23:06:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3078s; samplesPerSecond = 2079.1
-MPI Rank 2: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3129s; samplesPerSecond = 2045.3
-MPI Rank 2: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.3167s; samplesPerSecond = 2020.8
-MPI Rank 2: 05/03/2016 14:23:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.2899s; samplesPerSecond = 2208.0
-MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2866s; samplesPerSecond = 2232.8
-MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.2820s; samplesPerSecond = 2269.7
-MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.2841s; samplesPerSecond = 2252.8
-MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2501s; samplesPerSecond = 2559.5
-MPI Rank 2: 05/03/2016 14:23:08: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=9.33841s
+MPI Rank 2: 05/03/2016 14:21:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:00:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944914 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.9013s; samplesPerSecond = 710.1
+MPI Rank 2: 05/03/2016 14:22:01:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299974 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.9866s; samplesPerSecond = 648.7
+MPI Rank 2: 05/03/2016 14:22:02:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971317 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 1.0695s; samplesPerSecond = 598.4
+MPI Rank 2: 05/03/2016 14:22:03:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341645 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 1.1540s; samplesPerSecond = 554.6
+MPI Rank 2: 05/03/2016 14:22:04:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074398 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.9879s; samplesPerSecond = 647.8
+MPI Rank 2: 05/03/2016 14:22:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252058 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 1.1188s; samplesPerSecond = 572.1
+MPI Rank 2: 05/03/2016 14:22:06:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563305 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 1.1285s; samplesPerSecond = 567.1
+MPI Rank 2: 05/03/2016 14:22:07:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348925 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.9260s; samplesPerSecond = 691.1
+MPI Rank 2: 05/03/2016 14:22:08:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739941 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 1.1329s; samplesPerSecond = 564.9
+MPI Rank 2: 05/03/2016 14:22:09:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960765 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 1.1181s; samplesPerSecond = 572.4
+MPI Rank 2: 05/03/2016 14:22:10:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24655864 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.9809s; samplesPerSecond = 652.5
+MPI Rank 2: 05/03/2016 14:22:11:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397441 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 1.0724s; samplesPerSecond = 596.8
+MPI Rank 2: 05/03/2016 14:22:12:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780763 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.9957s; samplesPerSecond = 642.7
+MPI Rank 2: 05/03/2016 14:22:13:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845652 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 1.0632s; samplesPerSecond = 601.9
+MPI Rank 2: 05/03/2016 14:22:14:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458017 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 1.1024s; samplesPerSecond = 580.5
+MPI Rank 2: 05/03/2016 14:22:15:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633300 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 1.0100s; samplesPerSecond = 633.7
+MPI Rank 2: 05/03/2016 14:22:17:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607240 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 1.1250s; samplesPerSecond = 568.9
+MPI Rank 2: 05/03/2016 14:22:18:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74094816 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 1.0033s; samplesPerSecond = 637.9
+MPI Rank 2: 05/03/2016 14:22:19:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087650 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0628s; samplesPerSecond = 602.2
+MPI Rank 2: 05/03/2016 14:22:20:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608835 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 1.2599s; samplesPerSecond = 508.0
+MPI Rank 2: 05/03/2016 14:22:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732625 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.8931s; samplesPerSecond = 716.6
+MPI Rank 2: 05/03/2016 14:22:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925401 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 1.0340s; samplesPerSecond = 619.0
+MPI Rank 2: 05/03/2016 14:22:23:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388257 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 1.1379s; samplesPerSecond = 562.4
+MPI Rank 2: 05/03/2016 14:22:24:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544367 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 1.0321s; samplesPerSecond = 620.1
+MPI Rank 2: 05/03/2016 14:22:25:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43264910 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 1.1227s; samplesPerSecond = 570.1
+MPI Rank 2: 05/03/2016 14:22:26:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728478 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 1.0463s; samplesPerSecond = 611.7
+MPI Rank 2: 05/03/2016 14:22:27:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674570 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 1.0363s; samplesPerSecond = 617.6
+MPI Rank 2: 05/03/2016 14:22:28:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020700 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 1.0699s; samplesPerSecond = 598.2
+MPI Rank 2: 05/03/2016 14:22:29:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400392 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 1.0704s; samplesPerSecond = 597.9
+MPI Rank 2: 05/03/2016 14:22:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15884952 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 1.1757s; samplesPerSecond = 544.4
+MPI Rank 2: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712615 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 1.0125s; samplesPerSecond = 632.1
+MPI Rank 2: 05/03/2016 14:22:32:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604560 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.9460s; samplesPerSecond = 676.6
+MPI Rank 2: 05/03/2016 14:22:33: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704645 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=33.8955s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:22:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.4034s; samplesPerSecond = 6346.5
-MPI Rank 2: 05/03/2016 14:23:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.3954s; samplesPerSecond = 6473.9
-MPI Rank 2: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.3999s; samplesPerSecond = 6402.4
-MPI Rank 2: 05/03/2016 14:23:10:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.4055s; samplesPerSecond = 6313.3
-MPI Rank 2: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.4079s; samplesPerSecond = 6276.1
-MPI Rank 2: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.4165s; samplesPerSecond = 6146.1
-MPI Rank 2: 05/03/2016 14:23:11:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.4195s; samplesPerSecond = 6102.0
-MPI Rank 2: 05/03/2016 14:23:12:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.4214s; samplesPerSecond = 6075.1
-MPI Rank 2: 05/03/2016 14:23:12: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.2999s
+MPI Rank 2: 05/03/2016 14:22:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:34:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257504 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 1.3139s; samplesPerSecond = 1948.4
+MPI Rank 2: 05/03/2016 14:22:35:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548602 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 1.1693s; samplesPerSecond = 2189.3
+MPI Rank 2: 05/03/2016 14:22:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766993 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 1.2047s; samplesPerSecond = 2125.0
+MPI Rank 2: 05/03/2016 14:22:38:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049433 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 1.2677s; samplesPerSecond = 2019.4
+MPI Rank 2: 05/03/2016 14:22:39:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178368 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 1.0576s; samplesPerSecond = 2420.6
+MPI Rank 2: 05/03/2016 14:22:40:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359460 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 1.1673s; samplesPerSecond = 2193.1
+MPI Rank 2: 05/03/2016 14:22:41:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765117 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 1.2955s; samplesPerSecond = 1976.1
+MPI Rank 2: 05/03/2016 14:22:42:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682822 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 1.1883s; samplesPerSecond = 2154.4
+MPI Rank 2: 05/03/2016 14:22:42: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576037 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=9.75348s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:12: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:22:42: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:12: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:13:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.9812s; samplesPerSecond = 10436.6
-MPI Rank 2: 05/03/2016 14:23:14:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.9688s; samplesPerSecond = 10570.2
-MPI Rank 2: 05/03/2016 14:23:14: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.98261s
-MPI Rank 2: 05/03/2016 14:23:14: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:22:42: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:44:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593946 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 1.7741s; samplesPerSecond = 5771.8
+MPI Rank 2: 05/03/2016 14:22:46:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384561 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 1.7750s; samplesPerSecond = 5769.1
+MPI Rank 2: 05/03/2016 14:22:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989253 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.64446s
+MPI Rank 2: 05/03/2016 14:22:46: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:14: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:22:46: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:14: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:22:46: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
index b643e88e0..be95dda5a 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -58,18 +58,18 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
 ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 1 in a gearbox of 3
 mpihelper: we are cog 2 in a gearbox of 3
+mpihelper: we are cog 1 in a gearbox of 3
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: 3 nodes pinging each other
@@ -77,32 +77,32 @@ ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
-MPI Rank 0: 05/03/2016 14:23:15: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 14:23:15: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 14:23:15: Build info: 
+MPI Rank 0: 05/03/2016 14:22:47: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:22:47: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:22:47: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: 		Built time: May  3 2016 13:23:06
-MPI Rank 0: 05/03/2016 14:23:15: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 0: 05/03/2016 14:23:15: 		Build type: Release
-MPI Rank 0: 05/03/2016 14:23:15: 		Build target: GPU
-MPI Rank 0: 05/03/2016 14:23:15: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 14:23:15: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 14:23:15: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 14:23:15: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 14:23:15: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 14:23:15: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 14:23:15: 		Built by svcphil on LIANA-09-w
-MPI Rank 0: 05/03/2016 14:23:15: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 14:23:15: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:22:47: 		Built time: May  3 2016 13:23:06
+MPI Rank 0: 05/03/2016 14:22:47: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 0: 05/03/2016 14:22:47: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:22:47: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:22:47: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:22:47: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:22:47: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:22:47: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:22:47: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:22:47: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:22:47: 		Built by svcphil on LIANA-09-w
+MPI Rank 0: 05/03/2016 14:22:47: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:22:47: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: Running on DPHAIM-25 at 2016/05/03 14:23:15
-MPI Rank 0: 05/03/2016 14:23:15: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: 05/03/2016 14:22:47: Running on DPHAIM-25 at 2016/05/03 14:22:47
+MPI Rank 0: 05/03/2016 14:22:47: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:23:15: precision = "float"
+MPI Rank 0: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:47: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -192,27 +192,25 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:23:15: precision = "float"
+MPI Rank 0: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:47: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -296,33 +294,31 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=8
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=float
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -404,35 +400,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 0: ]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 14:23:15: Commands: speechTrain
-MPI Rank 0: 05/03/2016 14:23:15: Precision = "float"
-MPI Rank 0: 05/03/2016 14:23:15: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 14:23:15: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 14:23:15: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 0: 05/03/2016 14:23:15: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 0: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:47: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:22:47: Precision = "float"
+MPI Rank 0: 05/03/2016 14:22:47: Using 8 CPU threads.
+MPI Rank 0: 05/03/2016 14:22:47: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:22:47: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 0: 05/03/2016 14:22:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: ##############################################################################
-MPI Rank 0: 05/03/2016 14:23:15: #                                                                            #
-MPI Rank 0: 05/03/2016 14:23:15: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 14:23:15: #                                                                            #
-MPI Rank 0: 05/03/2016 14:23:15: ##############################################################################
+MPI Rank 0: 05/03/2016 14:22:47: ##############################################################################
+MPI Rank 0: 05/03/2016 14:22:47: #                                                                            #
+MPI Rank 0: 05/03/2016 14:22:47: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:22:47: #                                                                            #
+MPI Rank 0: 05/03/2016 14:22:47: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:22:47: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:15: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:22:47: Creating virgin network.
 MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -485,14 +480,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 14:22:48: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: Training criterion node(s):
-MPI Rank 0: 05/03/2016 14:23:16: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:22:48: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:22:48: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:22:48: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:22:48: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -500,135 +495,140 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0000006F23452D90: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0000006F23453010: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0000006F23453470: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0000006F23453BF0: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0000006F23453D30: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0000006F23454550: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0000006F26061090: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0000006F26061130: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0000006F26061270: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0000006F26061450: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0000006F26061630: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0000006F26061770: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0000006F260618B0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0000006F26061950: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0000006F260619F0: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0000006F26061D10: {[labels Value[132 x *]] }
-MPI Rank 0: 0000006F26061DB0: {[Prior Value[132]] }
-MPI Rank 0: 0000006F26061E50: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0000006F26061F90: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0000006F26062170: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0000006F26062350: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0000006F260623F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000006F26062490: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0000006F26062530: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0000006F260625D0: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0000006F260627B0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0000006F260628F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0000006F7E006010: {[features Value[363 x *]] }
+MPI Rank 0: 0000003A23EF8440: {[features Value[363 x *]] }
+MPI Rank 0: 0000003A443BB3C0: {[B0 Value[512 x 1]] }
+MPI Rank 0: 0000003A443BBC80: {[W1 Value[512 x 512]] }
+MPI Rank 0: 0000003A443BBD20: {[B1 Value[512 x 1]] }
+MPI Rank 0: 0000003A443BC9A0: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 0000003A443BCA40: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 0000003A443BCEA0: {[W0 Value[512 x 363]] }
+MPI Rank 0: 0000003A44C9DE30: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 0000003A44C9DED0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000003A44C9DF70: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 0000003A44C9E150: {[B2 Value[132 x 1]] }
+MPI Rank 0: 0000003A44C9E330: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 0000003A44C9E3D0: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 0000003A44C9E470: {[LogOfPrior Value[132]] }
+MPI Rank 0: 0000003A44C9E510: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 0000003A44C9E650: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 0000003A44C9E830: {[Prior Value[132]] }
+MPI Rank 0: 0000003A44C9E8D0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 0000003A44C9E970: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 0000003A44C9EAB0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 0000003A44C9EC90: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 0000003A44C9F230: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 0000003A44C9F2D0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000003A44C9F4B0: {[W0*features Value[512 x *]] }
+MPI Rank 0: 0000003A44C9F550: {[W2 Value[132 x 512]] }
+MPI Rank 0: 0000003A44C9F870: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 0000003A44C9F9B0: {[labels Value[132 x *]] }
+MPI Rank 0: 0000003A44C9FA50: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:22:48: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:16: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 14:23:16: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 14:23:16: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:22:48: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:22:48: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:22:48: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:19: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:22:51: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:20: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:22:52: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:20: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.1284s; samplesPerSecond = 4983.4
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0729s; samplesPerSecond = 8777.9
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0680s; samplesPerSecond = 9412.3
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0714s; samplesPerSecond = 8969.0
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0681s; samplesPerSecond = 9394.9
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0693s; samplesPerSecond = 9237.9
-MPI Rank 0: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0681s; samplesPerSecond = 9395.5
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0692s; samplesPerSecond = 9242.7
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0681s; samplesPerSecond = 9401.9
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0684s; samplesPerSecond = 9360.1
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0691s; samplesPerSecond = 9259.9
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0683s; samplesPerSecond = 9366.6
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0691s; samplesPerSecond = 9265.2
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0658s; samplesPerSecond = 9724.2
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0680s; samplesPerSecond = 9406.8
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0683s; samplesPerSecond = 9365.4
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0683s; samplesPerSecond = 9370.7
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0689s; samplesPerSecond = 9294.0
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0700s; samplesPerSecond = 9146.0
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0697s; samplesPerSecond = 9180.4
-MPI Rank 0: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0697s; samplesPerSecond = 9176.4
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0671s; samplesPerSecond = 9536.4
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0684s; samplesPerSecond = 9354.0
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0681s; samplesPerSecond = 9396.7
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0668s; samplesPerSecond = 9586.7
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0660s; samplesPerSecond = 9698.9
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0680s; samplesPerSecond = 9416.8
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0678s; samplesPerSecond = 9441.2
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0688s; samplesPerSecond = 9297.7
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0674s; samplesPerSecond = 9499.2
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0676s; samplesPerSecond = 9462.4
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0689s; samplesPerSecond = 9282.5
-MPI Rank 0: 05/03/2016 14:23:22: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.32719s
-MPI Rank 0: 05/03/2016 14:23:22: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:22:52: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.0819s; samplesPerSecond = 7811.6
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0740s; samplesPerSecond = 8646.7
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0671s; samplesPerSecond = 9536.3
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0709s; samplesPerSecond = 9024.3
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0673s; samplesPerSecond = 9515.6
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0673s; samplesPerSecond = 9511.6
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0662s; samplesPerSecond = 9664.0
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0677s; samplesPerSecond = 9452.2
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0673s; samplesPerSecond = 9511.5
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0664s; samplesPerSecond = 9633.5
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0674s; samplesPerSecond = 9498.4
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0671s; samplesPerSecond = 9541.1
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0667s; samplesPerSecond = 9591.8
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0673s; samplesPerSecond = 9503.2
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0666s; samplesPerSecond = 9603.8
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0670s; samplesPerSecond = 9557.4
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0669s; samplesPerSecond = 9568.2
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0663s; samplesPerSecond = 9656.6
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0687s; samplesPerSecond = 9319.5
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0666s; samplesPerSecond = 9614.5
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0681s; samplesPerSecond = 9392.4
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0667s; samplesPerSecond = 9588.6
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0685s; samplesPerSecond = 9344.4
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0657s; samplesPerSecond = 9739.6
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0666s; samplesPerSecond = 9612.6
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0664s; samplesPerSecond = 9635.4
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0663s; samplesPerSecond = 9647.7
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0666s; samplesPerSecond = 9615.5
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0678s; samplesPerSecond = 9444.8
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0676s; samplesPerSecond = 9469.6
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0662s; samplesPerSecond = 9673.4
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0669s; samplesPerSecond = 9568.4
+MPI Rank 0: 05/03/2016 14:22:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.1907s
+MPI Rank 0: 05/03/2016 14:22:54: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:22: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:22:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:22: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0921s; samplesPerSecond = 27807.6
-MPI Rank 0: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0755s; samplesPerSecond = 33890.2
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0730s; samplesPerSecond = 35049.8
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0741s; samplesPerSecond = 34527.4
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0728s; samplesPerSecond = 35159.0
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0772s; samplesPerSecond = 33141.3
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0736s; samplesPerSecond = 34765.1
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0733s; samplesPerSecond = 34926.4
-MPI Rank 0: 05/03/2016 14:23:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.622448s
-MPI Rank 0: 05/03/2016 14:23:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: 05/03/2016 14:22:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0929s; samplesPerSecond = 27569.6
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0751s; samplesPerSecond = 34107.0
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0732s; samplesPerSecond = 34958.8
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0744s; samplesPerSecond = 34428.0
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0725s; samplesPerSecond = 35333.3
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0756s; samplesPerSecond = 33874.5
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0733s; samplesPerSecond = 34944.5
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0731s; samplesPerSecond = 35010.5
+MPI Rank 0: 05/03/2016 14:22:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.620004s
+MPI Rank 0: 05/03/2016 14:22:55: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:22:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1300s; samplesPerSecond = 78779.5
-MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1038s; samplesPerSecond = 98633.2
-MPI Rank 0: 05/03/2016 14:23:23: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.243664s
-MPI Rank 0: 05/03/2016 14:23:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1332s; samplesPerSecond = 76898.8
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1069s; samplesPerSecond = 95828.1
+MPI Rank 0: 05/03/2016 14:22:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.248434s
+MPI Rank 0: 05/03/2016 14:22:55: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:22:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:23: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:22:55: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:23:23: __COMPLETED__
-MPI Rank 1: 05/03/2016 14:23:15: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 14:23:15: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 14:23:15: Build info: 
+MPI Rank 0: 05/03/2016 14:22:55: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:22:47: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:22:47: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:22:47: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: 		Built time: May  3 2016 13:23:06
-MPI Rank 1: 05/03/2016 14:23:15: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 1: 05/03/2016 14:23:15: 		Build type: Release
-MPI Rank 1: 05/03/2016 14:23:15: 		Build target: GPU
-MPI Rank 1: 05/03/2016 14:23:15: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 14:23:15: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 14:23:15: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 14:23:15: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 14:23:15: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 14:23:15: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 14:23:15: 		Built by svcphil on LIANA-09-w
-MPI Rank 1: 05/03/2016 14:23:15: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 14:23:15: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:22:47: 		Built time: May  3 2016 13:23:06
+MPI Rank 1: 05/03/2016 14:22:47: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 1: 05/03/2016 14:22:47: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:22:47: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:22:47: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:22:47: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:22:47: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:22:47: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:22:47: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:22:47: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:22:47: 		Built by svcphil on LIANA-09-w
+MPI Rank 1: 05/03/2016 14:22:47: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:22:47: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: Running on DPHAIM-25 at 2016/05/03 14:23:15
-MPI Rank 1: 05/03/2016 14:23:15: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: 05/03/2016 14:22:47: Running on DPHAIM-25 at 2016/05/03 14:22:47
+MPI Rank 1: 05/03/2016 14:22:47: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:23:15: precision = "float"
+MPI Rank 1: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:47: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -718,27 +718,25 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:23:15: precision = "float"
+MPI Rank 1: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:47: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -822,33 +820,31 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=8
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=float
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -930,35 +926,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 1: ]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 14:23:15: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 14:23:15: Commands: speechTrain
-MPI Rank 1: 05/03/2016 14:23:15: Precision = "float"
-MPI Rank 1: 05/03/2016 14:23:15: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 14:23:15: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 14:23:15: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 1: 05/03/2016 14:23:15: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 1: 05/03/2016 14:22:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:47: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:22:47: Precision = "float"
+MPI Rank 1: 05/03/2016 14:22:47: Using 8 CPU threads.
+MPI Rank 1: 05/03/2016 14:22:47: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:22:47: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 1: 05/03/2016 14:22:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: ##############################################################################
-MPI Rank 1: 05/03/2016 14:23:15: #                                                                            #
-MPI Rank 1: 05/03/2016 14:23:15: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 14:23:15: #                                                                            #
-MPI Rank 1: 05/03/2016 14:23:15: ##############################################################################
+MPI Rank 1: 05/03/2016 14:22:47: ##############################################################################
+MPI Rank 1: 05/03/2016 14:22:47: #                                                                            #
+MPI Rank 1: 05/03/2016 14:22:47: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:22:47: #                                                                            #
+MPI Rank 1: 05/03/2016 14:22:47: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:15: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:22:47: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:22:48: Creating virgin network.
 MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1011,14 +1006,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 14:22:49: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: Training criterion node(s):
-MPI Rank 1: 05/03/2016 14:23:16: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:22:49: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:22:49: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:22:49: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:22:49: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1026,132 +1021,137 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 00000098876AB6F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000098876AB970: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 00000098876ABAB0: {[W2 Value[132 x 512]] }
-MPI Rank 1: 00000098876ABC90: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 00000098876ABD30: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 00000098876ABE70: {[LogOfPrior Value[132]] }
-MPI Rank 1: 00000098876ABF10: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000098876ABFB0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000098876AC050: {[labels Value[132 x *]] }
-MPI Rank 1: 00000098876AC0F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 00000098876AC190: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 00000098876AC4B0: {[B2 Value[132 x 1]] }
-MPI Rank 1: 00000098876AC730: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 00000098876ACB90: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 00000098876ACC30: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 00000098876ACD70: {[Prior Value[132]] }
-MPI Rank 1: 00000098876ACE10: {[W0*features Value[512 x *]] }
-MPI Rank 1: 00000098876ACEB0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 00000098876ACFF0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 00000098876AD130: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000098876AD3B0: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 00000098E4626650: {[features Value[363 x *]] }
-MPI Rank 1: 00000098FDE4C1F0: {[W0 Value[512 x 363]] }
-MPI Rank 1: 00000098FDE4C290: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 00000098FDE4CB50: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 00000098FDE4CF10: {[B0 Value[512 x 1]] }
-MPI Rank 1: 00000098FDE4D230: {[W1 Value[512 x 512]] }
-MPI Rank 1: 00000098FDE4D370: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000D220C5FCA0: {[B0 Value[512 x 1]] }
+MPI Rank 1: 000000D220C60240: {[W1 Value[512 x 512]] }
+MPI Rank 1: 000000D220C60420: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 000000D220C609C0: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000D220C60D80: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 000000D220C61320: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000D22369D6B0: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000D22369D7F0: {[LogOfPrior Value[132]] }
+MPI Rank 1: 000000D22369D890: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000D22369DBB0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000D22369DC50: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000D22369DCF0: {[Prior Value[132]] }
+MPI Rank 1: 000000D22369DED0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000D22369E010: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000D22369E0B0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000D22369E290: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000D22369E330: {[labels Value[132 x *]] }
+MPI Rank 1: 000000D22369E470: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000D22369E650: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 000000D22369E830: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000D22369E8D0: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000D22369EB50: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 000000D22369EBF0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000D22369EDD0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000D22369F050: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000D22369F0F0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000D22369F4B0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000D27C2288A0: {[features Value[363 x *]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:22:49: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:16: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 14:23:16: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 14:23:16: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:22:49: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:22:49: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:22:49: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:20: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:22:52: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:20: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:22:52: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:20: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.1351s; samplesPerSecond = 4735.5
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0732s; samplesPerSecond = 8737.8
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0676s; samplesPerSecond = 9469.6
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0714s; samplesPerSecond = 8968.5
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0684s; samplesPerSecond = 9354.4
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0692s; samplesPerSecond = 9242.9
-MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0677s; samplesPerSecond = 9457.5
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0696s; samplesPerSecond = 9193.8
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0681s; samplesPerSecond = 9399.0
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0684s; samplesPerSecond = 9356.6
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0690s; samplesPerSecond = 9272.7
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0683s; samplesPerSecond = 9365.9
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0691s; samplesPerSecond = 9263.4
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0654s; samplesPerSecond = 9787.4
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0679s; samplesPerSecond = 9420.6
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0687s; samplesPerSecond = 9313.8
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0679s; samplesPerSecond = 9430.6
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0692s; samplesPerSecond = 9246.8
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0700s; samplesPerSecond = 9148.9
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0693s; samplesPerSecond = 9236.1
-MPI Rank 1: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0701s; samplesPerSecond = 9131.0
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0671s; samplesPerSecond = 9536.7
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0680s; samplesPerSecond = 9416.8
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0681s; samplesPerSecond = 9401.4
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0668s; samplesPerSecond = 9586.7
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0664s; samplesPerSecond = 9642.3
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0676s; samplesPerSecond = 9473.8
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0681s; samplesPerSecond = 9393.4
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0687s; samplesPerSecond = 9310.6
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0673s; samplesPerSecond = 9508.0
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0672s; samplesPerSecond = 9518.0
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0689s; samplesPerSecond = 9288.3
-MPI Rank 1: 05/03/2016 14:23:22: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.32705s
+MPI Rank 1: 05/03/2016 14:22:52: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.0820s; samplesPerSecond = 7804.2
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0741s; samplesPerSecond = 8641.5
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0667s; samplesPerSecond = 9594.8
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0709s; samplesPerSecond = 9023.7
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0677s; samplesPerSecond = 9456.8
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0673s; samplesPerSecond = 9509.1
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0663s; samplesPerSecond = 9656.6
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0673s; samplesPerSecond = 9510.5
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0672s; samplesPerSecond = 9517.2
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0664s; samplesPerSecond = 9635.9
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0674s; samplesPerSecond = 9501.3
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0671s; samplesPerSecond = 9542.7
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0667s; samplesPerSecond = 9594.6
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0673s; samplesPerSecond = 9505.8
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0666s; samplesPerSecond = 9605.3
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0669s; samplesPerSecond = 9560.4
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0668s; samplesPerSecond = 9576.7
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0666s; samplesPerSecond = 9607.9
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0683s; samplesPerSecond = 9376.5
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0665s; samplesPerSecond = 9617.4
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0681s; samplesPerSecond = 9393.9
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0667s; samplesPerSecond = 9596.2
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0685s; samplesPerSecond = 9347.0
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0657s; samplesPerSecond = 9742.6
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0670s; samplesPerSecond = 9555.4
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0660s; samplesPerSecond = 9695.4
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0667s; samplesPerSecond = 9591.9
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0666s; samplesPerSecond = 9614.8
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0678s; samplesPerSecond = 9438.7
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0672s; samplesPerSecond = 9527.1
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0661s; samplesPerSecond = 9679.7
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0668s; samplesPerSecond = 9574.7
+MPI Rank 1: 05/03/2016 14:22:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.19089s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:22: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:22:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:22: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0924s; samplesPerSecond = 27697.8
-MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0755s; samplesPerSecond = 33886.6
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0730s; samplesPerSecond = 35054.1
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0741s; samplesPerSecond = 34525.5
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0728s; samplesPerSecond = 35158.6
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0768s; samplesPerSecond = 33322.9
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0736s; samplesPerSecond = 34781.2
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0737s; samplesPerSecond = 34733.1
-MPI Rank 1: 05/03/2016 14:23:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.622655s
+MPI Rank 1: 05/03/2016 14:22:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0930s; samplesPerSecond = 27529.0
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0751s; samplesPerSecond = 34085.2
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0733s; samplesPerSecond = 34944.5
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0744s; samplesPerSecond = 34407.2
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0725s; samplesPerSecond = 35313.8
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0756s; samplesPerSecond = 33853.5
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0733s; samplesPerSecond = 34916.9
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0727s; samplesPerSecond = 35208.8
+MPI Rank 1: 05/03/2016 14:22:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.619777s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:22:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1303s; samplesPerSecond = 78612.6
-MPI Rank 1: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1038s; samplesPerSecond = 98653.2
-MPI Rank 1: 05/03/2016 14:23:23: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.243469s
-MPI Rank 1: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1334s; samplesPerSecond = 76763.9
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1069s; samplesPerSecond = 95831.7
+MPI Rank 1: 05/03/2016 14:22:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.248606s
+MPI Rank 1: 05/03/2016 14:22:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:23: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:22:55: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:23:23: __COMPLETED__
-MPI Rank 2: 05/03/2016 14:23:16: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 14:23:16: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 14:23:16: Build info: 
+MPI Rank 1: 05/03/2016 14:22:55: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:22:48: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:22:48: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:22:48: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: 		Built time: May  3 2016 13:23:06
-MPI Rank 2: 05/03/2016 14:23:16: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 2: 05/03/2016 14:23:16: 		Build type: Release
-MPI Rank 2: 05/03/2016 14:23:16: 		Build target: GPU
-MPI Rank 2: 05/03/2016 14:23:16: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 14:23:16: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 14:23:16: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 14:23:16: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 14:23:16: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 14:23:16: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 14:23:16: 		Built by svcphil on LIANA-09-w
-MPI Rank 2: 05/03/2016 14:23:16: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 14:23:16: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:22:48: 		Built time: May  3 2016 13:23:06
+MPI Rank 2: 05/03/2016 14:22:48: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 2: 05/03/2016 14:22:48: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:22:48: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:22:48: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:22:48: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:22:48: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:22:48: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:22:48: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:22:48: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:22:48: 		Built by svcphil on LIANA-09-w
+MPI Rank 2: 05/03/2016 14:22:48: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:22:48: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: Running on DPHAIM-25 at 2016/05/03 14:23:16
-MPI Rank 2: 05/03/2016 14:23:16: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: 05/03/2016 14:22:48: Running on DPHAIM-25 at 2016/05/03 14:22:48
+MPI Rank 2: 05/03/2016 14:22:48: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W2\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=8  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:23:16: precision = "float"
+MPI Rank 2: 05/03/2016 14:22:48: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:48: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1241,27 +1241,25 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:48: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:23:16: precision = "float"
+MPI Rank 2: 05/03/2016 14:22:48: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:48: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1345,33 +1343,31 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=8
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:48: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:48: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=float
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1453,35 +1449,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]]
+MPI Rank 2: ]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 14:23:16: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 14:23:16: Commands: speechTrain
-MPI Rank 2: 05/03/2016 14:23:16: Precision = "float"
-MPI Rank 2: 05/03/2016 14:23:16: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 14:23:16: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 14:23:16: CNTKCommandTrainInfo: speechTrain : 3
-MPI Rank 2: 05/03/2016 14:23:16: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+MPI Rank 2: 05/03/2016 14:22:48: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:48: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:22:48: Precision = "float"
+MPI Rank 2: 05/03/2016 14:22:48: Using 8 CPU threads.
+MPI Rank 2: 05/03/2016 14:22:48: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141958.750677\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:22:48: CNTKCommandTrainInfo: speechTrain : 3
+MPI Rank 2: 05/03/2016 14:22:48: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: ##############################################################################
-MPI Rank 2: 05/03/2016 14:23:16: #                                                                            #
-MPI Rank 2: 05/03/2016 14:23:16: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 14:23:16: #                                                                            #
-MPI Rank 2: 05/03/2016 14:23:16: ##############################################################################
+MPI Rank 2: 05/03/2016 14:22:48: ##############################################################################
+MPI Rank 2: 05/03/2016 14:22:48: #                                                                            #
+MPI Rank 2: 05/03/2016 14:22:48: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:22:48: #                                                                            #
+MPI Rank 2: 05/03/2016 14:22:48: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:22:48: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W2\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:16: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:22:48: Creating virgin network.
 MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1534,14 +1529,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 14:22:49: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: Training criterion node(s):
-MPI Rank 2: 05/03/2016 14:23:17: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:22:49: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:22:49: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:22:49: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:22:49: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1549,103 +1544,108 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 000000CBB36A6470: {[features Value[363 x *]] }
-MPI Rank 2: 000000CBD2DD69A0: {[B0 Value[512 x 1]] }
-MPI Rank 2: 000000CBD2DD6C20: {[W0 Value[512 x 363]] }
-MPI Rank 2: 000000CBD2DD6D60: {[W1 Value[512 x 512]] }
-MPI Rank 2: 000000CBD2DD71C0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 000000CBD2DD7760: {[B1 Value[512 x 1]] }
-MPI Rank 2: 000000CBD2DD8520: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 000000CBD5AF8250: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF8430: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 000000CBD5AF84D0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF8570: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 000000CBD5AF8750: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF8890: {[Prior Value[132]] }
-MPI Rank 2: 000000CBD5AF8930: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 000000CBD5AF8A70: {[W2 Value[132 x 512]] }
-MPI Rank 2: 000000CBD5AF8C50: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF8E30: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF9010: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 000000CBD5AF90B0: {[labels Value[132 x *]] }
-MPI Rank 2: 000000CBD5AF9330: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 000000CBD5AF93D0: {[LogOfPrior Value[132]] }
-MPI Rank 2: 000000CBD5AF9470: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 000000CBD5AF9830: {[W0*features Value[512 x *]] }
-MPI Rank 2: 000000CBD5AF98D0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 000000CBD5AF9A10: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF9BF0: {[B2 Value[132 x 1]] }
-MPI Rank 2: 000000CBD5AF9E70: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 000000CBD5AF9F10: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000FAD5E183A0: {[features Value[363 x *]] }
+MPI Rank 2: 000000FAF43354F0: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000FAF43359F0: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000FAF4335A90: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000FAF4336D50: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000FAF4336DF0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000FAF4336F30: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000FAF69C53C0: {[Prior Value[132]] }
+MPI Rank 2: 000000FAF69C5500: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000FAF69C5640: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000FAF69C5780: {[labels Value[132 x *]] }
+MPI Rank 2: 000000FAF69C5820: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FAF69C58C0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000FAF69C5B40: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000FAF69C5BE0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000FAF69C6180: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000FAF69C62C0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FAF69C6360: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000FAF69C6400: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000FAF69C6540: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000FAF69C65E0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000FAF69C6900: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000FAF69C6A40: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000FAF69C6C20: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000FAF69C6CC0: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000FAF69C6E00: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FAF69C6EA0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000FAF69C7260: {[B2 Gradient[132 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:22:49: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:17: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 14:23:17: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 14:23:17: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:22:49: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:22:49: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:22:49: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:20: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:22:52: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:20: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:22:52: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:20: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.1286s; samplesPerSecond = 4978.2
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0731s; samplesPerSecond = 8755.1
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0679s; samplesPerSecond = 9429.4
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0713s; samplesPerSecond = 8980.2
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0684s; samplesPerSecond = 9359.2
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0693s; samplesPerSecond = 9233.7
-MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0679s; samplesPerSecond = 9425.8
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0695s; samplesPerSecond = 9209.8
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0681s; samplesPerSecond = 9402.2
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0684s; samplesPerSecond = 9351.9
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0691s; samplesPerSecond = 9262.2
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0683s; samplesPerSecond = 9365.9
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0691s; samplesPerSecond = 9255.6
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0656s; samplesPerSecond = 9761.0
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0681s; samplesPerSecond = 9392.4
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0686s; samplesPerSecond = 9336.1
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0681s; samplesPerSecond = 9398.4
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0690s; samplesPerSecond = 9273.1
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0700s; samplesPerSecond = 9141.4
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0696s; samplesPerSecond = 9201.9
-MPI Rank 2: 05/03/2016 14:23:21:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0699s; samplesPerSecond = 9151.7
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0672s; samplesPerSecond = 9527.1
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0682s; samplesPerSecond = 9386.2
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0681s; samplesPerSecond = 9400.2
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0668s; samplesPerSecond = 9577.0
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0662s; samplesPerSecond = 9671.8
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0678s; samplesPerSecond = 9441.9
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0681s; samplesPerSecond = 9397.9
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0688s; samplesPerSecond = 9300.7
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0674s; samplesPerSecond = 9497.5
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0675s; samplesPerSecond = 9486.8
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0690s; samplesPerSecond = 9281.7
-MPI Rank 2: 05/03/2016 14:23:22: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.32691s
+MPI Rank 2: 05/03/2016 14:22:52: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645977 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.0816s; samplesPerSecond = 7846.8
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315777 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0741s; samplesPerSecond = 8642.3
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180674 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0669s; samplesPerSecond = 9560.8
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158077 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0710s; samplesPerSecond = 9014.5
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668764 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0675s; samplesPerSecond = 9479.4
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866395 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0673s; samplesPerSecond = 9506.7
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51808950 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0663s; samplesPerSecond = 9654.5
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455148 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0676s; samplesPerSecond = 9472.6
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829287 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0673s; samplesPerSecond = 9510.1
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167488 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0665s; samplesPerSecond = 9625.9
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861769 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0674s; samplesPerSecond = 9495.1
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32617094 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0671s; samplesPerSecond = 9535.7
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16898034 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0668s; samplesPerSecond = 9583.6
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08892096 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0674s; samplesPerSecond = 9500.8
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06004823 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0667s; samplesPerSecond = 9595.5
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128317 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0670s; samplesPerSecond = 9557.4
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90171900 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0669s; samplesPerSecond = 9566.7
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262443 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0665s; samplesPerSecond = 9620.4
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515411 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0685s; samplesPerSecond = 9343.3
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67382540 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0666s; samplesPerSecond = 9608.6
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869777 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0682s; samplesPerSecond = 9388.7
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032086 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0668s; samplesPerSecond = 9587.3
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134184 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0685s; samplesPerSecond = 9342.4
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362248 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0657s; samplesPerSecond = 9736.4
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640727 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0668s; samplesPerSecond = 9582.0
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745480 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0662s; samplesPerSecond = 9662.0
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16416048 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0666s; samplesPerSecond = 9614.2
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30346871 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0666s; samplesPerSecond = 9607.3
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398830 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0677s; samplesPerSecond = 9446.8
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322484 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0674s; samplesPerSecond = 9489.6
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664633 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0662s; samplesPerSecond = 9667.7
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246690 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0669s; samplesPerSecond = 9567.1
+MPI Rank 2: 05/03/2016 14:22:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000344 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.19061s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:22: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:22:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:22: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0918s; samplesPerSecond = 27872.4
-MPI Rank 2: 05/03/2016 14:23:22:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0755s; samplesPerSecond = 33893.4
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0731s; samplesPerSecond = 35041.6
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0742s; samplesPerSecond = 34503.7
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0728s; samplesPerSecond = 35178.8
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0771s; samplesPerSecond = 33212.2
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0736s; samplesPerSecond = 34766.1
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0735s; samplesPerSecond = 34823.8
-MPI Rank 2: 05/03/2016 14:23:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.622283s
+MPI Rank 2: 05/03/2016 14:22:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151949 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0925s; samplesPerSecond = 27660.7
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395686 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0751s; samplesPerSecond = 34091.5
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575480 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0733s; samplesPerSecond = 34935.0
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485033 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0744s; samplesPerSecond = 34391.5
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324139 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0725s; samplesPerSecond = 35307.4
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109324 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0756s; samplesPerSecond = 33851.2
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496253 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0733s; samplesPerSecond = 34939.3
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944298 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0730s; samplesPerSecond = 35079.1
+MPI Rank 2: 05/03/2016 14:22:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560270 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.619669s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:22:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1298s; samplesPerSecond = 78915.5
-MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1038s; samplesPerSecond = 98656.0
-MPI Rank 2: 05/03/2016 14:23:23: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.243277s
-MPI Rank 2: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:22:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752854 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1334s; samplesPerSecond = 76754.7
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358833 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1068s; samplesPerSecond = 95889.1
+MPI Rank 2: 05/03/2016 14:22:55: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055844 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.248275s
+MPI Rank 2: 05/03/2016 14:22:55: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:23: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:22:55: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:23:23: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:22:55: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/run-test
index de5b2af69..8c6c6032b 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantization/run-test
@@ -8,6 +8,12 @@ LogFileName=stderr
 Instances=3
 NumCPUThreads=$(threadsPerInstance $Instances)
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
 cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=$NumCPUThreads"
 ExitCode=$?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
index 85d8999ca..8dcaf1230 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
@@ -1,60 +1,6 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
--------------------------------------------------------------------
-Build info: 
-
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
--------------------------------------------------------------------
-Build info: 
-
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
+=== Running mpiexec -n 3 /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=-1 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPIWrapper: initializing MPI
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
--------------------------------------------------------------------
-Build info: 
-
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
 MPIWrapper: initializing MPI
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
@@ -62,77 +8,45 @@ ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 1 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
 ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+ping [mpihelper]: all 3 nodes responded
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 1 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
-ping [mpihelper]: all 3 nodes responded
-ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:17:13: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
-05/03/2016 18:17:14: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
-05/03/2016 18:17:14: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
---------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 673 on
-node 87698aadbc9d exiting improperly. There are three reasons this could occur:
-
-1. this process did not call "init" before exiting, but others in
-the job did. This can cause a job to hang indefinitely while it waits
-for all processes to call "init". By rule, if one process calls "init",
-then ALL processes must call "init" prior to termination.
-
-2. this process called "init", but exited without calling "finalize".
-By rule, all processes that call "init" MUST call "finalize" prior to
-exiting or it will be considered an "abnormal termination"
-
-3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
-orte_create_session_dirs is set to false. In this case, the run-time cannot
-detect that the abort call was an abnormal termination. Hence, the only
-error message you will receive is this one.
-
-This may have caused other processes in the application to be
-terminated by signals sent by mpiexec (as reported here).
-
-You can avoid this message by specifying -quiet on the mpiexec command line.
-
---------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:17:13: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:17:13: Build info: 
+Redirecting stderr to file /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr_speechTrain.logrank0
+Redirecting stderr to file /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr_speechTrain.logrank1
+Redirecting stderr to file /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr_speechTrain.logrank2
+MPI Rank 0: -------------------------------------------------------------------
+MPI Rank 0: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:17:13: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 0: 05/03/2016 18:17:13: 		Build type: release
-MPI Rank 0: 05/03/2016 18:17:13: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:17:13: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 18:17:13: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:17:13: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:17:13: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:17:13: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:17:13: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:17:13: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:17:13: 		Built by philly on 18750d26eb32
-MPI Rank 0: 05/03/2016 18:17:13: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:17:13: -------------------------------------------------------------------
+MPI Rank 0: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 0: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 0: 		Build type: debug
+MPI Rank 0: 		Math lib: acml
+MPI Rank 0: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 0: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 		Build Branch: master
+MPI Rank 0: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 0: -------------------------------------------------------------------
+MPI Rank 0: running on localhost at 2016/01/06 23:26:48
+MPI Rank 0: command line: 
+MPI Rank 0: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=-1 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Running on localhost at 2016/05/03 18:17:13
-MPI Rank 0: 05/03/2016 18:17:13: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 0: 
-MPI Rank 0: 
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:17:13: precision = "float"
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -170,7 +84,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -221,34 +135,30 @@ MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
-MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 0: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 0: DeviceId=-1
-MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
-MPI Rank 0: numCPUThreads=8
+MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:17:13: precision = "float"
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -280,7 +190,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -324,47 +234,42 @@ MPI Rank 0:             type = "real"
 MPI Rank 0:             scpFile = "glob_0000.scp"
 MPI Rank 0:         ]
 MPI Rank 0:         labels = [
-MPI Rank 0:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 0:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 0:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 0:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
-MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 0: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 0: DeviceId=-1
-MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
-MPI Rank 0: numCPUThreads=8
+MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
-MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: configparameters: cntk.config:command=speechTrain
+MPI Rank 0: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 0: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: configparameters: cntk.config:deviceId=-1
+MPI Rank 0: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 0: configparameters: cntk.config:parallelTrain=true
+MPI Rank 0: configparameters: cntk.config:precision=double
+MPI Rank 0: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 0: configparameters: cntk.config:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -396,7 +301,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -440,294 +345,558 @@ MPI Rank 0:             type = "real"
 MPI Rank 0:             scpFile = "glob_0000.scp"
 MPI Rank 0:         ]
 MPI Rank 0:         labels = [
-MPI Rank 0:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 0:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 0:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 0:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:17:13: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:17:13: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:17:13: Precision = "double"
-MPI Rank 0: 05/03/2016 18:17:13: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:17:13: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:17:13: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 18:17:13: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: ##############################################################################
-MPI Rank 0: 05/03/2016 18:17:13: #                                                                            #
-MPI Rank 0: 05/03/2016 18:17:13: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:17:13: #                                                                            #
-MPI Rank 0: 05/03/2016 18:17:13: ##############################################################################
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: command: speechTrain 
+MPI Rank 0: precision = double
+MPI Rank 0: Using 2 CPU threads
+MPI Rank 0: CNTKModelPath: /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn
+MPI Rank 0: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Creating virgin network.
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
+MPI Rank 0: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 0: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
 MPI Rank 0: 7 roots:
-MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 0: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 	MeanOfFeatures = Mean()
-MPI Rank 0: 	PosteriorProb = Softmax()
-MPI Rank 0: 	Prior = Mean()
-MPI Rank 0: 	ScaledLogLikelihood = Minus()
-MPI Rank 0: 
-MPI Rank 0: Validating network. 25 nodes to process in pass 1.
-MPI Rank 0: 
-MPI Rank 0: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 0: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 0: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 0: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 0: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 0: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 0: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 0: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 0: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 0: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 0: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 0: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 0: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 0: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 0: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 0: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 0: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 0: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 0: 
-MPI Rank 0: Validating network. 17 nodes to process in pass 2.
+MPI Rank 0: 	MeanOfFeatures = Mean
+MPI Rank 0: 	InvStdOfFeatures = InvStdDev
+MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 	Prior = Mean
+MPI Rank 0: 	ScaledLogLikelihood = Minus
+MPI Rank 0: 	PosteriorProb = Softmax
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: Validating network, final pass.
+MPI Rank 0: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node MeanOfFeatures, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 0: 
 MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 0: 
-MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Created model with 25 nodes on CPU.
+MPI Rank 0: SGD using CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:17:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: Training criterion node(s):
+MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Evaluation criterion node(s):
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: Evaluation criterion node(s):
+MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
 MPI Rank 0: 
-MPI Rank 0: Memory Sharing Structure:
+MPI Rank 0: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x1303ea8: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x13042e8: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x1305c98: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x130e9c8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x130eb88: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x1313e78: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x13275a8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x132cd08: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x133d3e8: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x133ec38: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x13a3b78: {[Prior Value[132]] }
-MPI Rank 0: 0x13c2218: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x13c23d8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x13d1bc8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x13d1d88: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x13d1f48: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x13d33d8: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x13f1668: {[features Value[363 x *]] }
-MPI Rank 0: 0x1431b18: {[labels Value[132 x *]] }
-MPI Rank 0: 0x1433cd8: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x1433e98: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 0x1439e48: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x1439fa8: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x143a108: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x143a268: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x143c148: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x143c2a8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x143d428: {[B2 Value[132 x 1]] }
+MPI Rank 0: 	NodeName: InvStdOfFeatures
+MPI Rank 0: 	NodeName: MeanOfFeatures
+MPI Rank 0: 	NodeName: Prior
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
+MPI Rank 0: Precomputing --> Completed.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 0: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:13: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:17:13: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:17:13: 	Prior = Mean()
+MPI Rank 0: Starting minibatch loop.
+MPI Rank 0: #PLUS#
+MPI Rank 0: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 0: #NLop5#
+MPI Rank 0: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 0: #PLUSBP#
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.36628272; EvalErrorPrediction = 0.90937500; TotalTime = 1.2313s; SamplesPerSecond = 519.8
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15914991; EvalErrorPrediction = 0.89218750; TotalTime = 0.7968s; SamplesPerSecond = 803.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.99837967; EvalErrorPrediction = 0.86875000; TotalTime = 0.9684s; SamplesPerSecond = 660.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86616341; EvalErrorPrediction = 0.86250000; TotalTime = 0.9333s; SamplesPerSecond = 685.7
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80082643; EvalErrorPrediction = 0.87968750; TotalTime = 1.0671s; SamplesPerSecond = 599.7
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73336112; EvalErrorPrediction = 0.87812500; TotalTime = 1.1067s; SamplesPerSecond = 578.3
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57119384; EvalErrorPrediction = 0.82031250; TotalTime = 1.1035s; SamplesPerSecond = 580.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.44001005; EvalErrorPrediction = 0.81562500; TotalTime = 1.0778s; SamplesPerSecond = 593.8
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.36131109; EvalErrorPrediction = 0.77343750; TotalTime = 1.0580s; SamplesPerSecond = 604.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.39817487; EvalErrorPrediction = 0.85000000; TotalTime = 0.8092s; SamplesPerSecond = 791.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.25116276; EvalErrorPrediction = 0.77031250; TotalTime = 0.8000s; SamplesPerSecond = 800.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.35774005; EvalErrorPrediction = 0.79843750; TotalTime = 0.9890s; SamplesPerSecond = 647.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.19791351; EvalErrorPrediction = 0.76406250; TotalTime = 1.0585s; SamplesPerSecond = 604.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.06449990; EvalErrorPrediction = 0.71718750; TotalTime = 1.0774s; SamplesPerSecond = 594.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.05357361; EvalErrorPrediction = 0.74218750; TotalTime = 1.1220s; SamplesPerSecond = 570.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  3.02144079; EvalErrorPrediction = 0.74531250; TotalTime = 1.0985s; SamplesPerSecond = 582.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.89890004; EvalErrorPrediction = 0.69687500; TotalTime = 1.0918s; SamplesPerSecond = 586.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.74598358; EvalErrorPrediction = 0.68593750; TotalTime = 1.0858s; SamplesPerSecond = 589.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.83604141; EvalErrorPrediction = 0.70625000; TotalTime = 1.0809s; SamplesPerSecond = 592.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.62522562; EvalErrorPrediction = 0.64687500; TotalTime = 1.1356s; SamplesPerSecond = 563.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.65507979; EvalErrorPrediction = 0.66562500; TotalTime = 0.9394s; SamplesPerSecond = 681.3
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.59593989; EvalErrorPrediction = 0.65937500; TotalTime = 0.8181s; SamplesPerSecond = 782.3
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.51177605; EvalErrorPrediction = 0.62343750; TotalTime = 0.8172s; SamplesPerSecond = 783.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42438840; EvalErrorPrediction = 0.63281250; TotalTime = 1.0088s; SamplesPerSecond = 634.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40372959; EvalErrorPrediction = 0.65156250; TotalTime = 1.0543s; SamplesPerSecond = 607.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.48277420; EvalErrorPrediction = 0.63906250; TotalTime = 1.0675s; SamplesPerSecond = 599.5
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.34181483; EvalErrorPrediction = 0.61718750; TotalTime = 1.1324s; SamplesPerSecond = 565.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.22951559; EvalErrorPrediction = 0.57656250; TotalTime = 1.1390s; SamplesPerSecond = 561.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.32715885; EvalErrorPrediction = 0.62031250; TotalTime = 1.1258s; SamplesPerSecond = 568.5
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.21143816; EvalErrorPrediction = 0.61406250; TotalTime = 1.1357s; SamplesPerSecond = 563.5
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.29118500; EvalErrorPrediction = 0.60156250; TotalTime = 1.1347s; SamplesPerSecond = 564.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.19155470; EvalErrorPrediction = 0.56406250; TotalTime = 1.1246s; SamplesPerSecond = 569.1
+MPI Rank 0: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779; EvalErrorPrediction = 0.7277832; learningRatePerSample = 0.015625; EpochTime=33.1964
+MPI Rank 0: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:19: Precomputing --> Completed.
-MPI Rank 0: 
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:21: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:21: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.5308s; samplesPerSecond = 1205.7
-MPI Rank 0: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.3306s; samplesPerSecond = 1935.8
-MPI Rank 0: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.3693s; samplesPerSecond = 1732.9
-MPI Rank 0: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.5229s; samplesPerSecond = 1223.9
-MPI Rank 0: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.3290s; samplesPerSecond = 1945.6
-MPI Rank 0: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3342s; samplesPerSecond = 1915.0
-MPI Rank 0: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3390s; samplesPerSecond = 1887.8
-MPI Rank 0: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.3708s; samplesPerSecond = 1725.9
-MPI Rank 0: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3804s; samplesPerSecond = 1682.4
-MPI Rank 0: 05/03/2016 18:17:25:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.5189s; samplesPerSecond = 1233.5
-MPI Rank 0: 05/03/2016 18:17:25:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.3404s; samplesPerSecond = 1880.3
-MPI Rank 0: 05/03/2016 18:17:26:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3362s; samplesPerSecond = 1903.4
-MPI Rank 0: 05/03/2016 18:17:26:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.3700s; samplesPerSecond = 1729.7
-MPI Rank 0: 05/03/2016 18:17:26:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3663s; samplesPerSecond = 1747.0
-MPI Rank 0: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.3595s; samplesPerSecond = 1780.0
-MPI Rank 0: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.5480s; samplesPerSecond = 1167.9
-MPI Rank 0: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3444s; samplesPerSecond = 1858.4
-MPI Rank 0: 05/03/2016 18:17:28:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3179s; samplesPerSecond = 2013.5
-MPI Rank 0: 05/03/2016 18:17:28:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.3751s; samplesPerSecond = 1706.0
-MPI Rank 0: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3640s; samplesPerSecond = 1758.3
-MPI Rank 0: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3465s; samplesPerSecond = 1847.2
-MPI Rank 0: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.5191s; samplesPerSecond = 1232.9
-MPI Rank 0: 05/03/2016 18:17:30:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3584s; samplesPerSecond = 1785.6
-MPI Rank 0: 05/03/2016 18:17:30:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3573s; samplesPerSecond = 1791.2
-MPI Rank 0: 05/03/2016 18:17:30:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.3511s; samplesPerSecond = 1822.7
-MPI Rank 0: 05/03/2016 18:17:31:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3486s; samplesPerSecond = 1836.1
-MPI Rank 0: 05/03/2016 18:17:31:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.5554s; samplesPerSecond = 1152.4
-MPI Rank 0: 05/03/2016 18:17:32:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.4501s; samplesPerSecond = 1421.9
-MPI Rank 0: 05/03/2016 18:17:32:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3540s; samplesPerSecond = 1807.7
-MPI Rank 0: 05/03/2016 18:17:33:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3297s; samplesPerSecond = 1941.1
-MPI Rank 0: 05/03/2016 18:17:33:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.3610s; samplesPerSecond = 1773.0
-MPI Rank 0: 05/03/2016 18:17:33:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3651s; samplesPerSecond = 1753.0
-MPI Rank 0: 05/03/2016 18:17:33: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.5019s
-MPI Rank 0: 05/03/2016 18:17:33: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.029402
-MPI Rank 0: Async gradient aggregation wait time: 0.071179
-MPI Rank 0: Actual gradient aggregation time: 0.017354
-MPI Rank 0: 05/03/2016 18:17:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09514596 * 2304; EvalErrorPrediction = 0.55989583 * 2304; time = 0.9286s; samplesPerSecond = 2481.1
-MPI Rank 0: Async gradient aggregation wait time: 0.015812
-MPI Rank 0: Actual gradient aggregation time: 0.019064
-MPI Rank 0: Async gradient aggregation wait time: 0.014546
-MPI Rank 0: Actual gradient aggregation time: 0.030847
-MPI Rank 0: 05/03/2016 18:17:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.14762552 * 2560; EvalErrorPrediction = 0.58242187 * 2560; time = 0.7629s; samplesPerSecond = 3355.7
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.017703
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.020986
-MPI Rank 0: 05/03/2016 18:17:36:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19977785 * 2560; EvalErrorPrediction = 0.58867187 * 2560; time = 0.9203s; samplesPerSecond = 2781.7
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.018731
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.01109
-MPI Rank 0: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.13471172 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.6527s; samplesPerSecond = 3921.9
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.017002
 MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 0: Actual gradient aggregation time: 0.020241
-MPI Rank 0: Async gradient aggregation wait time: 0.101386
-MPI Rank 0: Actual gradient aggregation time: 0.00597
-MPI Rank 0: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.07369296 * 2560; EvalErrorPrediction = 0.57382813 * 2560; time = 0.8482s; samplesPerSecond = 3018.0
-MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.011477
-MPI Rank 0: Async gradient aggregation wait time: 0.040599
-MPI Rank 0: Actual gradient aggregation time: 0.264378
-MPI Rank 0: 05/03/2016 18:17:38:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.14944464 * 2560; EvalErrorPrediction = 0.57578125 * 2560; time = 1.0650s; samplesPerSecond = 2403.8
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.009145
-MPI Rank 0: Async gradient aggregation wait time: 9e-06
-MPI Rank 0: Actual gradient aggregation time: 0.050316
-MPI Rank 0: 05/03/2016 18:17:39:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09921664 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.7399s; samplesPerSecond = 3460.0
-MPI Rank 0: Async gradient aggregation wait time: 8e-06
-MPI Rank 0: Actual gradient aggregation time: 0.014008
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.010712
-MPI Rank 0: 05/03/2016 18:17:40:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.04462189 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.6413s; samplesPerSecond = 3992.0
-MPI Rank 0: Async gradient aggregation wait time: 0.056019
-MPI Rank 0: Actual gradient aggregation time: 0.021519
-MPI Rank 0: 05/03/2016 18:17:40: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713 * 20480; EvalErrorPrediction = 0.57500000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.71428s
-MPI Rank 0: 05/03/2016 18:17:40: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:40: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.005501
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.09514596; EvalErrorPrediction = 0.55989583; TotalTime = 1.3414s; SamplesPerSecond = 1717.6
+MPI Rank 0: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006026
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005781
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.14762552; EvalErrorPrediction = 0.58242187; TotalTime = 1.3654s; SamplesPerSecond = 1874.9
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006045
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006305
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19977785; EvalErrorPrediction = 0.58867187; TotalTime = 1.3787s; SamplesPerSecond = 1856.8
 MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 0: Actual gradient aggregation time: 0.009779
+MPI Rank 0: Actual gradient aggregation time: 0.005319
 MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 0: Actual gradient aggregation time: 0.014794
-MPI Rank 0: 05/03/2016 18:17:42:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00619565 * 9216; EvalErrorPrediction = 0.55088976 * 9216; time = 2.0209s; samplesPerSecond = 4560.3
-MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.006482
-MPI Rank 0: Async gradient aggregation wait time: 1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.050314
-MPI Rank 0: 05/03/2016 18:17:44:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93824509 * 10240; EvalErrorPrediction = 0.53398437 * 10240; time = 2.3145s; samplesPerSecond = 4424.4
-MPI Rank 0: 05/03/2016 18:17:45: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281 * 20480; EvalErrorPrediction = 0.54194336 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.44623s
-MPI Rank 0: 05/03/2016 18:17:45: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:45: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.005503
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.13471172; EvalErrorPrediction = 0.59023437; TotalTime = 1.3831s; SamplesPerSecond = 1851.0
 MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 0: Actual gradient aggregation time: 0.024841
-MPI Rank 0: Async gradient aggregation wait time: 0.073388
-MPI Rank 0: Actual gradient aggregation time: 0.071822
-MPI Rank 0: 05/03/2016 18:17:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91072859 * 9216; EvalErrorPrediction = 0.52365451 * 9216; time = 1.7940s; samplesPerSecond = 5137.2
-MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.009846
-MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 0: Actual gradient aggregation time: 0.144756
-MPI Rank 0: 05/03/2016 18:17:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89799241 * 10240; EvalErrorPrediction = 0.52294922 * 10240; time = 1.8320s; samplesPerSecond = 5589.5
-MPI Rank 0: Async gradient aggregation wait time: 0.013612
-MPI Rank 0: 05/03/2016 18:17:48: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356 * 20480; EvalErrorPrediction = 0.52290039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=3.6891s
-MPI Rank 0: 05/03/2016 18:17:48: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:17:48: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: Actual gradient aggregation time: 0.005297
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005518
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.07369296; EvalErrorPrediction = 0.57382813; TotalTime = 1.3683s; SamplesPerSecond = 1871.0
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005775
+MPI Rank 0: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006074
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.14944464; EvalErrorPrediction = 0.57578125; TotalTime = 1.3681s; SamplesPerSecond = 1871.3
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.00529
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006175
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09921664; EvalErrorPrediction = 0.56484375; TotalTime = 1.3474s; SamplesPerSecond = 1899.9
+MPI Rank 0: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005733
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006003
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.04462189; EvalErrorPrediction = 0.56484375; TotalTime = 1.3342s; SamplesPerSecond = 1918.8
+MPI Rank 0: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006173
+MPI Rank 0: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713; EvalErrorPrediction = 0.575; learningRatePerSample = 0.001953125; EpochTime=10.9568
+MPI Rank 0: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:48: Action "train" complete.
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005437
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005918
+MPI Rank 0:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  2.00619565; EvalErrorPrediction = 0.55088976; TotalTime = 3.3670s; SamplesPerSecond = 2737.1
+MPI Rank 0: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 0: Actual gradient aggregation time: 0.006134
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.005916
+MPI Rank 0:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.93824509; EvalErrorPrediction = 0.53398437; TotalTime = 3.3488s; SamplesPerSecond = 3057.8
+MPI Rank 0: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281; EvalErrorPrediction = 0.54194336; learningRatePerSample = 9.7656251e-05; EpochTime=6.80837
+MPI Rank 0: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:48: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:17:14: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:17:14: Build info: 
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.022555
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.040141
+MPI Rank 0:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.91072859; EvalErrorPrediction = 0.52365451; TotalTime = 3.3219s; SamplesPerSecond = 2774.3
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.143767
+MPI Rank 0: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 0: Actual gradient aggregation time: 0.26264
+MPI Rank 0:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.89799241; EvalErrorPrediction = 0.52294922; TotalTime = 3.2030s; SamplesPerSecond = 3197.0
+MPI Rank 0: Async gradient aggregation wait time: 0.006704
+MPI Rank 0: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356; EvalErrorPrediction = 0.52290039; learningRatePerSample = 9.7656251e-05; EpochTime=6.84798
+MPI Rank 0: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: __COMPLETED__
+MPI Rank 0: ~MPIWrapper
+MPI Rank 1: -------------------------------------------------------------------
+MPI Rank 1: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:17:14: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 1: 05/03/2016 18:17:14: 		Build type: release
-MPI Rank 1: 05/03/2016 18:17:14: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:17:14: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 18:17:14: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:17:14: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:17:14: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:17:14: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:17:14: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:17:14: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:17:14: 		Built by philly on 18750d26eb32
-MPI Rank 1: 05/03/2016 18:17:14: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:17:14: -------------------------------------------------------------------
+MPI Rank 1: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 1: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 1: 		Build type: debug
+MPI Rank 1: 		Math lib: acml
+MPI Rank 1: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 1: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 		Build Branch: master
+MPI Rank 1: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 1: -------------------------------------------------------------------
+MPI Rank 1: running on localhost at 2016/01/06 23:26:48
+MPI Rank 1: command line: 
+MPI Rank 1: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=-1 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Running on localhost at 2016/05/03 18:17:14
-MPI Rank 1: 05/03/2016 18:17:14: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 1: 
-MPI Rank 1: 
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:17:14: precision = "float"
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -765,7 +934,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -816,34 +985,30 @@ MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
-MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 1: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 1: DeviceId=-1
-MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
-MPI Rank 1: numCPUThreads=8
+MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:17:14: precision = "float"
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -875,7 +1040,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -919,47 +1084,42 @@ MPI Rank 1:             type = "real"
 MPI Rank 1:             scpFile = "glob_0000.scp"
 MPI Rank 1:         ]
 MPI Rank 1:         labels = [
-MPI Rank 1:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 1:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 1:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 1:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
-MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 1: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 1: DeviceId=-1
-MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
-MPI Rank 1: numCPUThreads=8
+MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
-MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: configparameters: cntk.config:command=speechTrain
+MPI Rank 1: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 1: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: configparameters: cntk.config:deviceId=-1
+MPI Rank 1: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 1: configparameters: cntk.config:parallelTrain=true
+MPI Rank 1: configparameters: cntk.config:precision=double
+MPI Rank 1: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 1: configparameters: cntk.config:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -991,7 +1151,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -1035,290 +1195,558 @@ MPI Rank 1:             type = "real"
 MPI Rank 1:             scpFile = "glob_0000.scp"
 MPI Rank 1:         ]
 MPI Rank 1:         labels = [
-MPI Rank 1:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 1:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 1:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 1:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:17:14: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:17:14: Precision = "double"
-MPI Rank 1: 05/03/2016 18:17:14: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:17:14: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:17:14: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 18:17:14: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: ##############################################################################
-MPI Rank 1: 05/03/2016 18:17:14: #                                                                            #
-MPI Rank 1: 05/03/2016 18:17:14: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:17:14: #                                                                            #
-MPI Rank 1: 05/03/2016 18:17:14: ##############################################################################
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: command: speechTrain 
+MPI Rank 1: precision = double
+MPI Rank 1: Using 2 CPU threads
+MPI Rank 1: CNTKModelPath: /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn
+MPI Rank 1: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Creating virgin network.
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
+MPI Rank 1: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 1: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
 MPI Rank 1: 7 roots:
-MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 1: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 	MeanOfFeatures = Mean()
-MPI Rank 1: 	PosteriorProb = Softmax()
-MPI Rank 1: 	Prior = Mean()
-MPI Rank 1: 	ScaledLogLikelihood = Minus()
-MPI Rank 1: 
-MPI Rank 1: Validating network. 25 nodes to process in pass 1.
-MPI Rank 1: 
-MPI Rank 1: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 1: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 1: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 1: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 1: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 1: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 1: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 1: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 1: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 1: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 1: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 1: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 1: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 1: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 1: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 1: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 1: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 1: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 1: 
-MPI Rank 1: Validating network. 17 nodes to process in pass 2.
+MPI Rank 1: 	MeanOfFeatures = Mean
+MPI Rank 1: 	InvStdOfFeatures = InvStdDev
+MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 	Prior = Mean
+MPI Rank 1: 	ScaledLogLikelihood = Minus
+MPI Rank 1: 	PosteriorProb = Softmax
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: Validating network, final pass.
+MPI Rank 1: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node MeanOfFeatures, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 1: 
 MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 1: 
-MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Created model with 25 nodes on CPU.
+MPI Rank 1: SGD using CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:17:14: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: Training criterion node(s):
+MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Evaluation criterion node(s):
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: Evaluation criterion node(s):
+MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
 MPI Rank 1: 
-MPI Rank 1: Memory Sharing Structure:
+MPI Rank 1: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x10f2e28: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x10f4cd8: {[labels Value[132 x *]] }
-MPI Rank 1: 0x10f58b8: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x10f7088: {[features Value[363 x *]] }
-MPI Rank 1: 0x1100c28: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x1103d68: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x110a668: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x113cd98: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x113cf88: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x113d0e8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x113e238: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 0x11b0ad8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x11b0c98: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x11b0e58: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x11b1378: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x11b14c8: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x11b1bb8: {[Prior Value[132]] }
-MPI Rank 1: 0x11b9bf8: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x1207b78: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x1209698: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x1226428: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x122c838: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x122d058: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x122d218: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x122d3d8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x122f988: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x122fb48: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x122fd08: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 	NodeName: InvStdOfFeatures
+MPI Rank 1: 	NodeName: MeanOfFeatures
+MPI Rank 1: 	NodeName: Prior
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
+MPI Rank 1: Precomputing --> Completed.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 1: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:14: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:17:14: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:17:14: 	Prior = Mean()
+MPI Rank 1: Starting minibatch loop.
+MPI Rank 1: #PLUS#
+MPI Rank 1: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 1: #NLop5#
+MPI Rank 1: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 1: #PLUSBP#
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.36628272; EvalErrorPrediction = 0.90937500; TotalTime = 1.2459s; SamplesPerSecond = 513.7
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15914991; EvalErrorPrediction = 0.89218750; TotalTime = 1.0856s; SamplesPerSecond = 589.5
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.99837967; EvalErrorPrediction = 0.86875000; TotalTime = 1.0678s; SamplesPerSecond = 599.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86616341; EvalErrorPrediction = 0.86250000; TotalTime = 1.0544s; SamplesPerSecond = 607.0
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80082643; EvalErrorPrediction = 0.87968750; TotalTime = 0.9092s; SamplesPerSecond = 703.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73336112; EvalErrorPrediction = 0.87812500; TotalTime = 0.8011s; SamplesPerSecond = 798.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57119384; EvalErrorPrediction = 0.82031250; TotalTime = 1.0352s; SamplesPerSecond = 618.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.44001005; EvalErrorPrediction = 0.81562500; TotalTime = 0.8800s; SamplesPerSecond = 727.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.36131109; EvalErrorPrediction = 0.77343750; TotalTime = 0.9507s; SamplesPerSecond = 673.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.39817487; EvalErrorPrediction = 0.85000000; TotalTime = 0.9720s; SamplesPerSecond = 658.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.25116276; EvalErrorPrediction = 0.77031250; TotalTime = 0.8238s; SamplesPerSecond = 776.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.35774005; EvalErrorPrediction = 0.79843750; TotalTime = 0.9045s; SamplesPerSecond = 707.5
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.19791351; EvalErrorPrediction = 0.76406250; TotalTime = 1.1113s; SamplesPerSecond = 575.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.06449990; EvalErrorPrediction = 0.71718750; TotalTime = 0.9321s; SamplesPerSecond = 686.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.05357361; EvalErrorPrediction = 0.74218750; TotalTime = 0.7297s; SamplesPerSecond = 877.1
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  3.02144079; EvalErrorPrediction = 0.74531250; TotalTime = 0.6618s; SamplesPerSecond = 967.1
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.89890004; EvalErrorPrediction = 0.69687500; TotalTime = 0.7469s; SamplesPerSecond = 856.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.74598358; EvalErrorPrediction = 0.68593750; TotalTime = 0.8027s; SamplesPerSecond = 797.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.83604141; EvalErrorPrediction = 0.70625000; TotalTime = 0.6692s; SamplesPerSecond = 956.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.62522562; EvalErrorPrediction = 0.64687500; TotalTime = 0.6625s; SamplesPerSecond = 966.0
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.65507979; EvalErrorPrediction = 0.66562500; TotalTime = 0.7818s; SamplesPerSecond = 818.7
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.59593989; EvalErrorPrediction = 0.65937500; TotalTime = 0.8652s; SamplesPerSecond = 739.7
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.51177605; EvalErrorPrediction = 0.62343750; TotalTime = 1.0687s; SamplesPerSecond = 598.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42438840; EvalErrorPrediction = 0.63281250; TotalTime = 1.0737s; SamplesPerSecond = 596.1
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40372959; EvalErrorPrediction = 0.65156250; TotalTime = 1.0723s; SamplesPerSecond = 596.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.48277420; EvalErrorPrediction = 0.63906250; TotalTime = 1.0673s; SamplesPerSecond = 599.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.34181483; EvalErrorPrediction = 0.61718750; TotalTime = 1.0991s; SamplesPerSecond = 582.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.22951559; EvalErrorPrediction = 0.57656250; TotalTime = 1.1095s; SamplesPerSecond = 576.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.32715885; EvalErrorPrediction = 0.62031250; TotalTime = 1.0623s; SamplesPerSecond = 602.5
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.21143816; EvalErrorPrediction = 0.61406250; TotalTime = 1.0597s; SamplesPerSecond = 604.0
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.29118500; EvalErrorPrediction = 0.60156250; TotalTime = 1.0513s; SamplesPerSecond = 608.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.19155470; EvalErrorPrediction = 0.56406250; TotalTime = 1.0534s; SamplesPerSecond = 607.5
+MPI Rank 1: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779; EvalErrorPrediction = 0.7277832; learningRatePerSample = 0.015625; EpochTime=30.4183
+MPI Rank 1: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:15: Precomputing --> Completed.
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.046047
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.138694
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.09514596; EvalErrorPrediction = 0.55989583; TotalTime = 1.2304s; SamplesPerSecond = 1872.5
+MPI Rank 1: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 1: Actual gradient aggregation time: 0.119024
+MPI Rank 1: Async gradient aggregation wait time: 8e-06
+MPI Rank 1: Actual gradient aggregation time: 0.12827
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.14762552; EvalErrorPrediction = 0.58242187; TotalTime = 1.3526s; SamplesPerSecond = 1892.7
+MPI Rank 1: Async gradient aggregation wait time: 0.004926
+MPI Rank 1: Actual gradient aggregation time: 0.142038
+MPI Rank 1: Async gradient aggregation wait time: 0.011642
+MPI Rank 1: Actual gradient aggregation time: 0.138231
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19977785; EvalErrorPrediction = 0.58867187; TotalTime = 1.3781s; SamplesPerSecond = 1857.6
+MPI Rank 1: Async gradient aggregation wait time: 0.003444
+MPI Rank 1: Actual gradient aggregation time: 0.140383
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.14099
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.13471172; EvalErrorPrediction = 0.59023437; TotalTime = 1.3807s; SamplesPerSecond = 1854.1
+MPI Rank 1: Async gradient aggregation wait time: 0.00307
+MPI Rank 1: Actual gradient aggregation time: 0.135863
+MPI Rank 1: Async gradient aggregation wait time: 0.006403
+MPI Rank 1: Actual gradient aggregation time: 0.133294
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.07369296; EvalErrorPrediction = 0.57382813; TotalTime = 1.3809s; SamplesPerSecond = 1853.8
+MPI Rank 1: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 1: Actual gradient aggregation time: 0.122461
+MPI Rank 1: Async gradient aggregation wait time: 0.009764
+MPI Rank 1: Actual gradient aggregation time: 0.130475
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.14944464; EvalErrorPrediction = 0.57578125; TotalTime = 1.3624s; SamplesPerSecond = 1879.0
+MPI Rank 1: Async gradient aggregation wait time: 0.003466
+MPI Rank 1: Actual gradient aggregation time: 0.135504
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.121177
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09921664; EvalErrorPrediction = 0.56484375; TotalTime = 1.3754s; SamplesPerSecond = 1861.3
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.104627
+MPI Rank 1: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 1: Actual gradient aggregation time: 0.08827
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.04462189; EvalErrorPrediction = 0.56484375; TotalTime = 1.3409s; SamplesPerSecond = 1909.1
+MPI Rank 1: Async gradient aggregation wait time: 0.034765
+MPI Rank 1: Actual gradient aggregation time: 0.068492
+MPI Rank 1: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713; EvalErrorPrediction = 0.575; learningRatePerSample = 0.001953125; EpochTime=10.91
+MPI Rank 1: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 1: Actual gradient aggregation time: 0.046922
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.049929
+MPI Rank 1:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  2.00619565; EvalErrorPrediction = 0.55088976; TotalTime = 3.3045s; SamplesPerSecond = 2788.9
+MPI Rank 1: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 1: Actual gradient aggregation time: 0.107407
+MPI Rank 1: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 1: Actual gradient aggregation time: 0.030779
+MPI Rank 1:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.93824509; EvalErrorPrediction = 0.53398437; TotalTime = 3.3284s; SamplesPerSecond = 3076.6
+MPI Rank 1: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281; EvalErrorPrediction = 0.54194336; learningRatePerSample = 9.7656251e-05; EpochTime=6.75424
+MPI Rank 1: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:21: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:21: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.1938s; samplesPerSecond = 3302.3
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.1038s; samplesPerSecond = 6163.3
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.1047s; samplesPerSecond = 6114.0
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.1038s; samplesPerSecond = 6165.0
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.1042s; samplesPerSecond = 6141.0
-MPI Rank 1: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1036s; samplesPerSecond = 6177.2
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.1044s; samplesPerSecond = 6128.4
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.1046s; samplesPerSecond = 6115.9
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1060s; samplesPerSecond = 6040.0
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.1059s; samplesPerSecond = 6045.0
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.1089s; samplesPerSecond = 5877.3
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.1058s; samplesPerSecond = 6047.9
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.1058s; samplesPerSecond = 6050.5
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.1059s; samplesPerSecond = 6041.9
-MPI Rank 1: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.1059s; samplesPerSecond = 6044.5
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.1058s; samplesPerSecond = 6050.2
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.1057s; samplesPerSecond = 6057.0
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.1058s; samplesPerSecond = 6051.8
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.1057s; samplesPerSecond = 6056.0
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.1058s; samplesPerSecond = 6046.9
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.1061s; samplesPerSecond = 6033.1
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1081s; samplesPerSecond = 5922.5
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.1058s; samplesPerSecond = 6047.4
-MPI Rank 1: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.1055s; samplesPerSecond = 6063.8
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.1063s; samplesPerSecond = 6018.2
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.1075s; samplesPerSecond = 5954.9
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.1066s; samplesPerSecond = 6005.7
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.1070s; samplesPerSecond = 5979.9
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1063s; samplesPerSecond = 6022.9
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.1057s; samplesPerSecond = 6056.9
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.1057s; samplesPerSecond = 6056.7
-MPI Rank 1: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.1057s; samplesPerSecond = 6057.6
-MPI Rank 1: 05/03/2016 18:17:24: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.51796s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.256751
-MPI Rank 1: Async gradient aggregation wait time: 0.152524
-MPI Rank 1: Actual gradient aggregation time: 0.017439
-MPI Rank 1: 05/03/2016 18:17:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09514596 * 2304; EvalErrorPrediction = 0.55989583 * 2304; time = 0.9111s; samplesPerSecond = 2528.9
-MPI Rank 1: Async gradient aggregation wait time: 0.060633
-MPI Rank 1: Actual gradient aggregation time: 0.023823
-MPI Rank 1: Async gradient aggregation wait time: 0.141508
-MPI Rank 1: Actual gradient aggregation time: 0.030909
-MPI Rank 1: 05/03/2016 18:17:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.14762552 * 2560; EvalErrorPrediction = 0.58242187 * 2560; time = 0.7441s; samplesPerSecond = 3440.3
-MPI Rank 1: Async gradient aggregation wait time: 0.032629
-MPI Rank 1: Actual gradient aggregation time: 0.078841
-MPI Rank 1: Async gradient aggregation wait time: 0.086116
-MPI Rank 1: Actual gradient aggregation time: 0.074649
-MPI Rank 1: 05/03/2016 18:17:36:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19977785 * 2560; EvalErrorPrediction = 0.58867187 * 2560; time = 0.9154s; samplesPerSecond = 2796.7
-MPI Rank 1: Async gradient aggregation wait time: 0.04271
-MPI Rank 1: Actual gradient aggregation time: 0.057857
-MPI Rank 1: Async gradient aggregation wait time: 0.032583
-MPI Rank 1: Actual gradient aggregation time: 0.099574
-MPI Rank 1: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.13471172 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.6551s; samplesPerSecond = 3907.8
-MPI Rank 1: Async gradient aggregation wait time: 0.036529
-MPI Rank 1: Actual gradient aggregation time: 0.055498
-MPI Rank 1: Async gradient aggregation wait time: 0.132228
-MPI Rank 1: Actual gradient aggregation time: 0.013017
-MPI Rank 1: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.07369296 * 2560; EvalErrorPrediction = 0.57382813 * 2560; time = 0.8666s; samplesPerSecond = 2954.0
-MPI Rank 1: Async gradient aggregation wait time: 0.021536
-MPI Rank 1: Actual gradient aggregation time: 0.051534
-MPI Rank 1: Async gradient aggregation wait time: 0.343543
-MPI Rank 1: Actual gradient aggregation time: 0.151395
-MPI Rank 1: 05/03/2016 18:17:38:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.14944464 * 2560; EvalErrorPrediction = 0.57578125 * 2560; time = 1.0492s; samplesPerSecond = 2440.0
-MPI Rank 1: Async gradient aggregation wait time: 0.041122
-MPI Rank 1: Actual gradient aggregation time: 0.049106
-MPI Rank 1: Async gradient aggregation wait time: 0.036614
-MPI Rank 1: Actual gradient aggregation time: 0.111577
-MPI Rank 1: 05/03/2016 18:17:39:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09921664 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.7697s; samplesPerSecond = 3326.0
-MPI Rank 1: Async gradient aggregation wait time: 0.002193
-MPI Rank 1: Actual gradient aggregation time: 0.060973
-MPI Rank 1: Async gradient aggregation wait time: 0.044073
-MPI Rank 1: Actual gradient aggregation time: 0.080688
-MPI Rank 1: 05/03/2016 18:17:40:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.04462189 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.5997s; samplesPerSecond = 4269.0
-MPI Rank 1: Async gradient aggregation wait time: 0.088484
-MPI Rank 1: Actual gradient aggregation time: 0.03975
-MPI Rank 1: 05/03/2016 18:17:40: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713 * 20480; EvalErrorPrediction = 0.57500000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.64205s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:40: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.151793
-MPI Rank 1: Actual gradient aggregation time: 0.185264
-MPI Rank 1: Async gradient aggregation wait time: 0.12537
-MPI Rank 1: Actual gradient aggregation time: 0.177347
-MPI Rank 1: 05/03/2016 18:17:42:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00619565 * 9216; EvalErrorPrediction = 0.55088976 * 9216; time = 1.8249s; samplesPerSecond = 5050.0
-MPI Rank 1: Async gradient aggregation wait time: 0.189029
-MPI Rank 1: Actual gradient aggregation time: 0.215694
-MPI Rank 1: Async gradient aggregation wait time: 0.10713
-MPI Rank 1: Actual gradient aggregation time: 0.243509
-MPI Rank 1: 05/03/2016 18:17:44:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93824509 * 10240; EvalErrorPrediction = 0.53398437 * 10240; time = 2.1309s; samplesPerSecond = 4805.4
-MPI Rank 1: 05/03/2016 18:17:44: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281 * 20480; EvalErrorPrediction = 0.54194336 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.35452s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:45: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.07418
-MPI Rank 1: Actual gradient aggregation time: 0.135066
-MPI Rank 1: Async gradient aggregation wait time: 0.306501
-MPI Rank 1: Actual gradient aggregation time: 0.098328
-MPI Rank 1: 05/03/2016 18:17:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91072859 * 9216; EvalErrorPrediction = 0.52365451 * 9216; time = 1.5884s; samplesPerSecond = 5802.0
-MPI Rank 1: Async gradient aggregation wait time: 0.281448
-MPI Rank 1: Actual gradient aggregation time: 0.1621
-MPI Rank 1: Async gradient aggregation wait time: 0.084342
-MPI Rank 1: Actual gradient aggregation time: 0.299371
-MPI Rank 1: 05/03/2016 18:17:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89799241 * 10240; EvalErrorPrediction = 0.52294922 * 10240; time = 1.9459s; samplesPerSecond = 5262.4
-MPI Rank 1: Async gradient aggregation wait time: 0.035358
-MPI Rank 1: 05/03/2016 18:17:48: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356 * 20480; EvalErrorPrediction = 0.52290039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=3.68894s
-MPI Rank 1: 05/03/2016 18:17:48: CNTKCommandTrainEnd: speechTrain
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:48: Action "train" complete.
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:48: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:17:14: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:17:14: Build info: 
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 9e-06
+MPI Rank 1: Actual gradient aggregation time: 0.006172
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.005785
+MPI Rank 1:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.91072859; EvalErrorPrediction = 0.52365451; TotalTime = 3.4161s; SamplesPerSecond = 2697.8
+MPI Rank 1: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 1: Actual gradient aggregation time: 0.006201
+MPI Rank 1: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 1: Actual gradient aggregation time: 0.005721
+MPI Rank 1:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.89799241; EvalErrorPrediction = 0.52294922; TotalTime = 3.3852s; SamplesPerSecond = 3024.9
+MPI Rank 1: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 1: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356; EvalErrorPrediction = 0.52290039; learningRatePerSample = 9.7656251e-05; EpochTime=6.8934
+MPI Rank 1: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: __COMPLETED__
+MPI Rank 1: ~MPIWrapper
+MPI Rank 2: -------------------------------------------------------------------
+MPI Rank 2: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:17:14: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 2: 05/03/2016 18:17:14: 		Build type: release
-MPI Rank 2: 05/03/2016 18:17:14: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:17:14: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 18:17:14: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:17:14: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:17:14: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:17:14: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:17:14: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:17:14: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:17:14: 		Built by philly on 18750d26eb32
-MPI Rank 2: 05/03/2016 18:17:14: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:17:14: -------------------------------------------------------------------
+MPI Rank 2: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 2: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 2: 		Build type: debug
+MPI Rank 2: 		Math lib: acml
+MPI Rank 2: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 2: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 		Build Branch: master
+MPI Rank 2: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 2: -------------------------------------------------------------------
+MPI Rank 2: running on localhost at 2016/01/06 23:26:49
+MPI Rank 2: command line: 
+MPI Rank 2: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=-1 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Running on localhost at 2016/05/03 18:17:14
-MPI Rank 2: 05/03/2016 18:17:14: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 2: 
-MPI Rank 2: 
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:17:14: precision = "float"
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1356,7 +1784,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1407,34 +1835,30 @@ MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
-MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 2: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 2: DeviceId=-1
-MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
-MPI Rank 2: numCPUThreads=8
+MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:17:14: precision = "float"
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1466,7 +1890,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1510,47 +1934,42 @@ MPI Rank 2:             type = "real"
 MPI Rank 2:             scpFile = "glob_0000.scp"
 MPI Rank 2:         ]
 MPI Rank 2:         labels = [
-MPI Rank 2:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 2:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 2:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 2:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
-MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 2: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 2: DeviceId=-1
-MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
-MPI Rank 2: numCPUThreads=8
+MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
-MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
-MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: configparameters: cntk.config:command=speechTrain
+MPI Rank 2: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 2: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: configparameters: cntk.config:deviceId=-1
+MPI Rank 2: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 2: configparameters: cntk.config:parallelTrain=true
+MPI Rank 2: configparameters: cntk.config:precision=double
+MPI Rank 2: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu
+MPI Rank 2: configparameters: cntk.config:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1582,7 +2001,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1626,261 +2045,537 @@ MPI Rank 2:             type = "real"
 MPI Rank 2:             scpFile = "glob_0000.scp"
 MPI Rank 2:         ]
 MPI Rank 2:         labels = [
-MPI Rank 2:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 2:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 2:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 2:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
-MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:17:14: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:17:14: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:17:14: Precision = "double"
-MPI Rank 2: 05/03/2016 18:17:14: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:17:14: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:17:14: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 18:17:14: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: ##############################################################################
-MPI Rank 2: 05/03/2016 18:17:14: #                                                                            #
-MPI Rank 2: 05/03/2016 18:17:14: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:17:14: #                                                                            #
-MPI Rank 2: 05/03/2016 18:17:14: ##############################################################################
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/stderr
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: command: speechTrain 
+MPI Rank 2: precision = double
+MPI Rank 2: Using 2 CPU threads
+MPI Rank 2: CNTKModelPath: /tmp/cntk-test-20160106232647.724209/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_cpu/models/cntkSpeech.dnn
+MPI Rank 2: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Creating virgin network.
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
+MPI Rank 2: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 2: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
 MPI Rank 2: 7 roots:
-MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 2: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 	MeanOfFeatures = Mean()
-MPI Rank 2: 	PosteriorProb = Softmax()
-MPI Rank 2: 	Prior = Mean()
-MPI Rank 2: 	ScaledLogLikelihood = Minus()
-MPI Rank 2: 
-MPI Rank 2: Validating network. 25 nodes to process in pass 1.
-MPI Rank 2: 
-MPI Rank 2: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 2: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 2: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 2: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 2: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 2: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 2: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 2: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 2: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 2: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 2: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 2: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 2: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 2: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 2: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 2: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 2: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 2: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 2: 
-MPI Rank 2: Validating network. 17 nodes to process in pass 2.
+MPI Rank 2: 	MeanOfFeatures = Mean
+MPI Rank 2: 	InvStdOfFeatures = InvStdDev
+MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 	Prior = Mean
+MPI Rank 2: 	ScaledLogLikelihood = Minus
+MPI Rank 2: 	PosteriorProb = Softmax
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: Validating network, final pass.
+MPI Rank 2: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node MeanOfFeatures, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 2: 
 MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 2: 
-MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Created model with 25 nodes on CPU.
+MPI Rank 2: SGD using CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:17:14: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: Training criterion node(s):
+MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Evaluation criterion node(s):
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: Evaluation criterion node(s):
+MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
 MPI Rank 2: 
-MPI Rank 2: Memory Sharing Structure:
+MPI Rank 2: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x1c4f1d8: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x1c4f3b8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x1c4fb68: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x1c5c088: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x1c63438: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x1c635f8: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x1c6f4d8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1c6f698: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x1ca13b8: {[features Value[363 x *]] }
-MPI Rank 2: 0x1ceeb78: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x1ceed38: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x1ceeef8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x1cfff18: {[labels Value[132 x *]] }
-MPI Rank 2: 0x1d1f1b8: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x1d37f48: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0x1d457a8: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x1d58e98: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1d590a8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x1d59268: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x1d5cde8: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x1d5cfa8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1d5d168: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x1d66268: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x1d6dc18: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x1d6ddd8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x1d6e3f8: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x1d8ae38: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x1d8e0c8: {[Prior Value[132]] }
+MPI Rank 2: 	NodeName: InvStdOfFeatures
+MPI Rank 2: 	NodeName: MeanOfFeatures
+MPI Rank 2: 	NodeName: Prior
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
+MPI Rank 2: Precomputing --> Completed.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 2: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:14: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:17:14: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:17:14: 	Prior = Mean()
+MPI Rank 2: Starting minibatch loop.
+MPI Rank 2: #PLUS#
+MPI Rank 2: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 2: #NLop5#
+MPI Rank 2: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 2: #PLUSBP#
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.36628272; EvalErrorPrediction = 0.90937500; TotalTime = 0.7096s; SamplesPerSecond = 901.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15914991; EvalErrorPrediction = 0.89218750; TotalTime = 0.6952s; SamplesPerSecond = 920.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.99837967; EvalErrorPrediction = 0.86875000; TotalTime = 0.8293s; SamplesPerSecond = 771.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86616341; EvalErrorPrediction = 0.86250000; TotalTime = 0.8270s; SamplesPerSecond = 773.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80082643; EvalErrorPrediction = 0.87968750; TotalTime = 0.9680s; SamplesPerSecond = 661.2
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73336112; EvalErrorPrediction = 0.87812500; TotalTime = 1.1485s; SamplesPerSecond = 557.2
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57119384; EvalErrorPrediction = 0.82031250; TotalTime = 0.8633s; SamplesPerSecond = 741.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.44001005; EvalErrorPrediction = 0.81562500; TotalTime = 0.8252s; SamplesPerSecond = 775.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.36131109; EvalErrorPrediction = 0.77343750; TotalTime = 0.7957s; SamplesPerSecond = 804.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.39817487; EvalErrorPrediction = 0.85000000; TotalTime = 1.1151s; SamplesPerSecond = 573.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.25116276; EvalErrorPrediction = 0.77031250; TotalTime = 1.0905s; SamplesPerSecond = 586.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.35774005; EvalErrorPrediction = 0.79843750; TotalTime = 1.1102s; SamplesPerSecond = 576.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.19791351; EvalErrorPrediction = 0.76406250; TotalTime = 1.1037s; SamplesPerSecond = 579.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.06449990; EvalErrorPrediction = 0.71718750; TotalTime = 0.8253s; SamplesPerSecond = 775.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.05357361; EvalErrorPrediction = 0.74218750; TotalTime = 0.8097s; SamplesPerSecond = 790.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  3.02144079; EvalErrorPrediction = 0.74531250; TotalTime = 0.9776s; SamplesPerSecond = 654.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.89890004; EvalErrorPrediction = 0.69687500; TotalTime = 1.0392s; SamplesPerSecond = 615.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.74598358; EvalErrorPrediction = 0.68593750; TotalTime = 0.8033s; SamplesPerSecond = 796.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.83604141; EvalErrorPrediction = 0.70625000; TotalTime = 1.0417s; SamplesPerSecond = 614.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.62522562; EvalErrorPrediction = 0.64687500; TotalTime = 1.1086s; SamplesPerSecond = 577.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.65507979; EvalErrorPrediction = 0.66562500; TotalTime = 0.9366s; SamplesPerSecond = 683.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.59593989; EvalErrorPrediction = 0.65937500; TotalTime = 1.1645s; SamplesPerSecond = 549.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.51177605; EvalErrorPrediction = 0.62343750; TotalTime = 1.1295s; SamplesPerSecond = 566.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42438840; EvalErrorPrediction = 0.63281250; TotalTime = 1.1017s; SamplesPerSecond = 580.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40372959; EvalErrorPrediction = 0.65156250; TotalTime = 1.1070s; SamplesPerSecond = 578.2
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.48277420; EvalErrorPrediction = 0.63906250; TotalTime = 0.8212s; SamplesPerSecond = 779.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.34181483; EvalErrorPrediction = 0.61718750; TotalTime = 0.8427s; SamplesPerSecond = 759.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.22951559; EvalErrorPrediction = 0.57656250; TotalTime = 0.9096s; SamplesPerSecond = 703.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.32715885; EvalErrorPrediction = 0.62031250; TotalTime = 1.0912s; SamplesPerSecond = 586.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.21143816; EvalErrorPrediction = 0.61406250; TotalTime = 1.1533s; SamplesPerSecond = 554.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.29118500; EvalErrorPrediction = 0.60156250; TotalTime = 1.1513s; SamplesPerSecond = 555.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.19155470; EvalErrorPrediction = 0.56406250; TotalTime = 1.1538s; SamplesPerSecond = 554.7
+MPI Rank 2: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779; EvalErrorPrediction = 0.7277832; learningRatePerSample = 0.015625; EpochTime=31.2545
+MPI Rank 2: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:21: Precomputing --> Completed.
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.060522
+MPI Rank 2: Async gradient aggregation wait time: 0.031623
+MPI Rank 2: Actual gradient aggregation time: 0.14453
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.09514596; EvalErrorPrediction = 0.55989583; TotalTime = 1.2174s; SamplesPerSecond = 1892.5
+MPI Rank 2: Async gradient aggregation wait time: 0.018795
+MPI Rank 2: Actual gradient aggregation time: 0.128166
+MPI Rank 2: Async gradient aggregation wait time: 0.022281
+MPI Rank 2: Actual gradient aggregation time: 0.132534
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.14762552; EvalErrorPrediction = 0.58242187; TotalTime = 1.3620s; SamplesPerSecond = 1879.6
+MPI Rank 2: Async gradient aggregation wait time: 0.032316
+MPI Rank 2: Actual gradient aggregation time: 0.138782
+MPI Rank 2: Async gradient aggregation wait time: 0.038093
+MPI Rank 2: Actual gradient aggregation time: 0.129845
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19977785; EvalErrorPrediction = 0.58867187; TotalTime = 1.3797s; SamplesPerSecond = 1855.5
+MPI Rank 2: Async gradient aggregation wait time: 0.032278
+MPI Rank 2: Actual gradient aggregation time: 0.140392
+MPI Rank 2: Async gradient aggregation wait time: 0.028146
+MPI Rank 2: Actual gradient aggregation time: 0.142341
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.13471172; EvalErrorPrediction = 0.59023437; TotalTime = 1.3790s; SamplesPerSecond = 1856.5
+MPI Rank 2: Async gradient aggregation wait time: 0.035036
+MPI Rank 2: Actual gradient aggregation time: 0.135871
+MPI Rank 2: Async gradient aggregation wait time: 0.027692
+MPI Rank 2: Actual gradient aggregation time: 0.133302
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.07369296; EvalErrorPrediction = 0.57382813; TotalTime = 1.3808s; SamplesPerSecond = 1854.0
+MPI Rank 2: Async gradient aggregation wait time: 0.017662
+MPI Rank 2: Actual gradient aggregation time: 0.131173
+MPI Rank 2: Async gradient aggregation wait time: 0.031357
+MPI Rank 2: Actual gradient aggregation time: 0.127953
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.14944464; EvalErrorPrediction = 0.57578125; TotalTime = 1.3611s; SamplesPerSecond = 1880.8
+MPI Rank 2: Async gradient aggregation wait time: 0.021014
+MPI Rank 2: Actual gradient aggregation time: 0.135512
+MPI Rank 2: Async gradient aggregation wait time: 0.01649
+MPI Rank 2: Actual gradient aggregation time: 0.137525
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09921664; EvalErrorPrediction = 0.56484375; TotalTime = 1.3517s; SamplesPerSecond = 1893.8
+MPI Rank 2: Async gradient aggregation wait time: 0.023689
+MPI Rank 2: Actual gradient aggregation time: 0.133718
+MPI Rank 2: Async gradient aggregation wait time: 0.025159
+MPI Rank 2: Actual gradient aggregation time: 0.138051
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.04462189; EvalErrorPrediction = 0.56484375; TotalTime = 1.3358s; SamplesPerSecond = 1916.5
+MPI Rank 2: Async gradient aggregation wait time: 0.067309
+MPI Rank 2: Actual gradient aggregation time: 0.065304
+MPI Rank 2: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713; EvalErrorPrediction = 0.575; learningRatePerSample = 0.001953125; EpochTime=10.91
+MPI Rank 2: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.079988
+MPI Rank 2: Actual gradient aggregation time: 0.334992
+MPI Rank 2: Async gradient aggregation wait time: 0.096806
+MPI Rank 2: Actual gradient aggregation time: 0.33545
+MPI Rank 2:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  2.00619565; EvalErrorPrediction = 0.55088976; TotalTime = 3.0141s; SamplesPerSecond = 3057.6
+MPI Rank 2: Async gradient aggregation wait time: 0.075682
+MPI Rank 2: Actual gradient aggregation time: 0.336393
+MPI Rank 2: Async gradient aggregation wait time: 0.062102
+MPI Rank 2: Actual gradient aggregation time: 0.315776
+MPI Rank 2:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.93824509; EvalErrorPrediction = 0.53398437; TotalTime = 3.3451s; SamplesPerSecond = 3061.2
+MPI Rank 2: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281; EvalErrorPrediction = 0.54194336; learningRatePerSample = 9.7656251e-05; EpochTime=6.75423
+MPI Rank 2: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:21: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:21: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:17:21:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.36628272 * 640; EvalErrorPrediction = 0.90937500 * 640; time = 0.2912s; samplesPerSecond = 2197.9
-MPI Rank 2: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15914991 * 640; EvalErrorPrediction = 0.89218750 * 640; time = 0.4430s; samplesPerSecond = 1444.6
-MPI Rank 2: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99837967 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.2995s; samplesPerSecond = 2136.6
-MPI Rank 2: 05/03/2016 18:17:22:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86616341 * 640; EvalErrorPrediction = 0.86250000 * 640; time = 0.3414s; samplesPerSecond = 1874.5
-MPI Rank 2: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80082643 * 640; EvalErrorPrediction = 0.87968750 * 640; time = 0.4069s; samplesPerSecond = 1573.0
-MPI Rank 2: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73336112 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.3578s; samplesPerSecond = 1788.9
-MPI Rank 2: 05/03/2016 18:17:23:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57119384 * 640; EvalErrorPrediction = 0.82031250 * 640; time = 0.3430s; samplesPerSecond = 1865.8
-MPI Rank 2: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.44001005 * 640; EvalErrorPrediction = 0.81562500 * 640; time = 0.5425s; samplesPerSecond = 1179.8
-MPI Rank 2: 05/03/2016 18:17:24:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36131109 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.3156s; samplesPerSecond = 2027.7
-MPI Rank 2: 05/03/2016 18:17:25:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39817487 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3897s; samplesPerSecond = 1642.4
-MPI Rank 2: 05/03/2016 18:17:25:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25116276 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.4182s; samplesPerSecond = 1530.5
-MPI Rank 2: 05/03/2016 18:17:25:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35774005 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.3946s; samplesPerSecond = 1622.0
-MPI Rank 2: 05/03/2016 18:17:26:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19791351 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.5333s; samplesPerSecond = 1200.0
-MPI Rank 2: 05/03/2016 18:17:26:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06449990 * 640; EvalErrorPrediction = 0.71718750 * 640; time = 0.3297s; samplesPerSecond = 1941.2
-MPI Rank 2: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05357361 * 640; EvalErrorPrediction = 0.74218750 * 640; time = 0.3544s; samplesPerSecond = 1806.0
-MPI Rank 2: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02144079 * 640; EvalErrorPrediction = 0.74531250 * 640; time = 0.3695s; samplesPerSecond = 1732.2
-MPI Rank 2: 05/03/2016 18:17:27:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89890004 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.3098s; samplesPerSecond = 2065.9
-MPI Rank 2: 05/03/2016 18:17:28:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74598358 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3440s; samplesPerSecond = 1860.4
-MPI Rank 2: 05/03/2016 18:17:28:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83604141 * 640; EvalErrorPrediction = 0.70625000 * 640; time = 0.5601s; samplesPerSecond = 1142.6
-MPI Rank 2: 05/03/2016 18:17:28:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62522562 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.3037s; samplesPerSecond = 2107.6
-MPI Rank 2: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.65507979 * 640; EvalErrorPrediction = 0.66562500 * 640; time = 0.3126s; samplesPerSecond = 2047.5
-MPI Rank 2: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.59593989 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3432s; samplesPerSecond = 1864.6
-MPI Rank 2: 05/03/2016 18:17:29:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51177605 * 640; EvalErrorPrediction = 0.62343750 * 640; time = 0.3358s; samplesPerSecond = 1905.6
-MPI Rank 2: 05/03/2016 18:17:30:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42438840 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.3475s; samplesPerSecond = 1841.7
-MPI Rank 2: 05/03/2016 18:17:30:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40372959 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.5180s; samplesPerSecond = 1235.5
-MPI Rank 2: 05/03/2016 18:17:31:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48277420 * 640; EvalErrorPrediction = 0.63906250 * 640; time = 0.3048s; samplesPerSecond = 2099.9
-MPI Rank 2: 05/03/2016 18:17:31:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34181483 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.3391s; samplesPerSecond = 1887.3
-MPI Rank 2: 05/03/2016 18:17:31:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.22951559 * 640; EvalErrorPrediction = 0.57656250 * 640; time = 0.4149s; samplesPerSecond = 1542.6
-MPI Rank 2: 05/03/2016 18:17:32:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.32715885 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.3393s; samplesPerSecond = 1886.4
-MPI Rank 2: 05/03/2016 18:17:32:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21143816 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3185s; samplesPerSecond = 2009.3
-MPI Rank 2: 05/03/2016 18:17:33:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29118500 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.4962s; samplesPerSecond = 1289.8
-MPI Rank 2: 05/03/2016 18:17:33:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.19155470 * 640; EvalErrorPrediction = 0.56406250 * 640; time = 0.3142s; samplesPerSecond = 2036.7
-MPI Rank 2: 05/03/2016 18:17:33: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.01292779 * 20480; EvalErrorPrediction = 0.72778320 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=12.1027s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:33: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.175128
-MPI Rank 2: Async gradient aggregation wait time: 0.117374
-MPI Rank 2: Actual gradient aggregation time: 0.002975
-MPI Rank 2: 05/03/2016 18:17:34:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09514596 * 2304; EvalErrorPrediction = 0.55989583 * 2304; time = 0.9264s; samplesPerSecond = 2487.2
-MPI Rank 2: Async gradient aggregation wait time: 7e-06
-MPI Rank 2: Actual gradient aggregation time: 0.00293
-MPI Rank 2: Async gradient aggregation wait time: 1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.002881
-MPI Rank 2: 05/03/2016 18:17:35:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.14762552 * 2560; EvalErrorPrediction = 0.58242187 * 2560; time = 0.7463s; samplesPerSecond = 3430.1
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.064325
-MPI Rank 2: Async gradient aggregation wait time: 0.072604
-MPI Rank 2: Actual gradient aggregation time: 0.074621
-MPI Rank 2: 05/03/2016 18:17:36:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19977785 * 2560; EvalErrorPrediction = 0.58867187 * 2560; time = 0.8973s; samplesPerSecond = 2853.0
-MPI Rank 2: Async gradient aggregation wait time: 0.030852
-MPI Rank 2: Actual gradient aggregation time: 0.044373
-MPI Rank 2: Async gradient aggregation wait time: 1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.080097
-MPI Rank 2: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.13471172 * 2560; EvalErrorPrediction = 0.59023437 * 2560; time = 0.6665s; samplesPerSecond = 3840.9
-MPI Rank 2: Async gradient aggregation wait time: 8e-06
-MPI Rank 2: Actual gradient aggregation time: 0.042524
-MPI Rank 2: Async gradient aggregation wait time: 0.085588
-MPI Rank 2: Actual gradient aggregation time: 0.013021
-MPI Rank 2: 05/03/2016 18:17:37:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.07369296 * 2560; EvalErrorPrediction = 0.57382813 * 2560; time = 0.8632s; samplesPerSecond = 2965.7
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.043282
-MPI Rank 2: Async gradient aggregation wait time: 0.285633
-MPI Rank 2: Actual gradient aggregation time: 0.264442
-MPI Rank 2: 05/03/2016 18:17:38:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.14944464 * 2560; EvalErrorPrediction = 0.57578125 * 2560; time = 1.0412s; samplesPerSecond = 2458.6
-MPI Rank 2: Async gradient aggregation wait time: 0.023156
-MPI Rank 2: Actual gradient aggregation time: 0.035269
-MPI Rank 2: Async gradient aggregation wait time: 0.006022
-MPI Rank 2: Actual gradient aggregation time: 0.103537
-MPI Rank 2: 05/03/2016 18:17:39:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09921664 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.7819s; samplesPerSecond = 3274.2
-MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.026742
-MPI Rank 2: Async gradient aggregation wait time: 0.013842
-MPI Rank 2: Actual gradient aggregation time: 0.059232
-MPI Rank 2: 05/03/2016 18:17:40:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.04462189 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.6050s; samplesPerSecond = 4231.1
-MPI Rank 2: Async gradient aggregation wait time: 0.086932
-MPI Rank 2: Actual gradient aggregation time: 0.005437
-MPI Rank 2: 05/03/2016 18:17:40: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.11636713 * 20480; EvalErrorPrediction = 0.57500000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.64211s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:40: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:40: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.147008
-MPI Rank 2: Async gradient aggregation wait time: 1.2e-05
-MPI Rank 2: Actual gradient aggregation time: 0.130967
-MPI Rank 2: 05/03/2016 18:17:42:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.00619565 * 9216; EvalErrorPrediction = 0.55088976 * 9216; time = 1.8697s; samplesPerSecond = 4929.2
-MPI Rank 2: Async gradient aggregation wait time: 0.124091
-MPI Rank 2: Actual gradient aggregation time: 0.211671
-MPI Rank 2: Async gradient aggregation wait time: 0.00589
-MPI Rank 2: Actual gradient aggregation time: 0.248853
-MPI Rank 2: 05/03/2016 18:17:44:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93824509 * 10240; EvalErrorPrediction = 0.53398437 * 10240; time = 2.0930s; samplesPerSecond = 4892.5
-MPI Rank 2: 05/03/2016 18:17:44: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.97096281 * 20480; EvalErrorPrediction = 0.54194336 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=4.35176s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:45: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:45: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.002676
-MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.003152
-MPI Rank 2: 05/03/2016 18:17:46:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91072859 * 9216; EvalErrorPrediction = 0.52365451 * 9216; time = 1.6066s; samplesPerSecond = 5736.2
-MPI Rank 2: Async gradient aggregation wait time: 0.031845
-MPI Rank 2: Actual gradient aggregation time: 0.162115
-MPI Rank 2: Async gradient aggregation wait time: 1e-05
-MPI Rank 2: Actual gradient aggregation time: 0.00666
-MPI Rank 2: 05/03/2016 18:17:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89799241 * 10240; EvalErrorPrediction = 0.52294922 * 10240; time = 2.0239s; samplesPerSecond = 5059.6
-MPI Rank 2: Async gradient aggregation wait time: 0.009178
-MPI Rank 2: 05/03/2016 18:17:48: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356 * 20480; EvalErrorPrediction = 0.52290039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=3.68899s
-MPI Rank 2: 05/03/2016 18:17:48: CNTKCommandTrainEnd: speechTrain
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:48: Action "train" complete.
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:48: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 1.5e-05
+MPI Rank 2: Actual gradient aggregation time: 0.242073
+MPI Rank 2: Async gradient aggregation wait time: 0.084231
+MPI Rank 2: Actual gradient aggregation time: 0.328716
+MPI Rank 2:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.91072859; EvalErrorPrediction = 0.52365451; TotalTime = 3.0890s; SamplesPerSecond = 2983.5
+MPI Rank 2: Async gradient aggregation wait time: 0.098743
+MPI Rank 2: Actual gradient aggregation time: 0.358852
+MPI Rank 2: Async gradient aggregation wait time: 0.07654
+MPI Rank 2: Actual gradient aggregation time: 0.332335
+MPI Rank 2:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.89799241; EvalErrorPrediction = 0.52294922; TotalTime = 3.4555s; SamplesPerSecond = 2963.4
+MPI Rank 2: Async gradient aggregation wait time: 0.004703
+MPI Rank 2: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.90474356; EvalErrorPrediction = 0.52290039; learningRatePerSample = 9.7656251e-05; EpochTime=6.84798
+MPI Rank 2: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: __COMPLETED__
+MPI Rank 2: ~MPIWrapper
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
index 55812fb2d..46a5fb9c1 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
@@ -1,138 +1,52 @@
-=== Running mpiexec -n 3 /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
--------------------------------------------------------------------
-Build info: 
-
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
--------------------------------------------------------------------
-Build info: 
-
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
--------------------------------------------------------------------
-Build info: 
-
-		Built time: May  3 2016 17:56:15
-		Last modified date: Tue May  3 11:36:22 2016
-		Build type: release
-		Build target: GPU
-		With 1bit-SGD: no
-		Math lib: acml
-		CUDA_PATH: /usr/local/cuda-7.5
-		CUB_PATH: /usr/local/cub-1.4.1
-		CUDNN_PATH: /usr/local/cudnn-4.0
-		Build Branch: HEAD
-		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-		Built by philly on 18750d26eb32
-		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
--------------------------------------------------------------------
+=== Running mpiexec -n 3 /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=0 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPIWrapper: initializing MPI
-Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 MPIWrapper: initializing MPI
 MPIWrapper: initializing MPI
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
-ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (before change)]: all 3 nodes responded
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 1 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
+ping [requestnodes (before change)]: all 3 nodes responded
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
+ping [requestnodes (after change)]: 3 nodes pinging each other
+ping [requestnodes (after change)]: all 3 nodes responded
+mpihelper: we are cog 1 in a gearbox of 3
+ping [mpihelper]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
-05/03/2016 18:17:50: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
-05/03/2016 18:17:50: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
-05/03/2016 18:17:51: Redirecting stderr to file /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
---------------------------------------------------------------------------
-mpiexec has exited due to process rank 0 with PID 14269 on
-node 87698aadbc9d exiting improperly. There are three reasons this could occur:
-
-1. this process did not call "init" before exiting, but others in
-the job did. This can cause a job to hang indefinitely while it waits
-for all processes to call "init". By rule, if one process calls "init",
-then ALL processes must call "init" prior to termination.
-
-2. this process called "init", but exited without calling "finalize".
-By rule, all processes that call "init" MUST call "finalize" prior to
-exiting or it will be considered an "abnormal termination"
-
-3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
-orte_create_session_dirs is set to false. In this case, the run-time cannot
-detect that the abort call was an abnormal termination. Hence, the only
-error message you will receive is this one.
-
-This may have caused other processes in the application to be
-terminated by signals sent by mpiexec (as reported here).
-
-You can avoid this message by specifying -quiet on the mpiexec command line.
-
---------------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:17:50: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 18:17:50: Build info: 
+Redirecting stderr to file /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr_speechTrain.logrank0
+Redirecting stderr to file /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr_speechTrain.logrank1
+Redirecting stderr to file /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr_speechTrain.logrank2
+MPI Rank 0: -------------------------------------------------------------------
+MPI Rank 0: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: 		Built time: May  3 2016 17:56:15
-MPI Rank 0: 05/03/2016 18:17:50: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 0: 05/03/2016 18:17:50: 		Build type: release
-MPI Rank 0: 05/03/2016 18:17:50: 		Build target: GPU
-MPI Rank 0: 05/03/2016 18:17:50: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 18:17:50: 		Math lib: acml
-MPI Rank 0: 05/03/2016 18:17:50: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 0: 05/03/2016 18:17:50: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 0: 05/03/2016 18:17:50: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 0: 05/03/2016 18:17:50: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 18:17:50: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 0: 05/03/2016 18:17:50: 		Built by philly on 18750d26eb32
-MPI Rank 0: 05/03/2016 18:17:50: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 0: 05/03/2016 18:17:50: -------------------------------------------------------------------
+MPI Rank 0: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 0: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 0: 		Build type: debug
+MPI Rank 0: 		Math lib: acml
+MPI Rank 0: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 0: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 0: 		Build Branch: master
+MPI Rank 0: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 0: -------------------------------------------------------------------
+MPI Rank 0: running on localhost at 2016/01/06 23:25:24
+MPI Rank 0: command line: 
+MPI Rank 0: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=0 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Running on localhost at 2016/05/03 18:17:50
-MPI Rank 0: 05/03/2016 18:17:50: Command line: 
-MPI Rank 0: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 0: 
-MPI Rank 0: 
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:17:50: precision = "float"
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -170,7 +84,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -221,34 +135,30 @@ MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
-MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 0: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 0: DeviceId=0
-MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
-MPI Rank 0: numCPUThreads=8
+MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 18:17:50: precision = "float"
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -280,7 +190,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -324,47 +234,42 @@ MPI Rank 0:             type = "real"
 MPI Rank 0:             scpFile = "glob_0000.scp"
 MPI Rank 0:         ]
 MPI Rank 0:         labels = [
-MPI Rank 0:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 0:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 0:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 0:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
-MPI Rank 0: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 0: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 0: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 0: DeviceId=0
-MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
-MPI Rank 0: numCPUThreads=8
+MPI Rank 0: numCPUThreads=2
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 0: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 0: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 0: configparameters: cntk.cntk:deviceId=0
-MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 0: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: configparameters: cntk.config:command=speechTrain
+MPI Rank 0: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 0: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 0: configparameters: cntk.config:deviceId=0
+MPI Rank 0: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 0: configparameters: cntk.config:parallelTrain=true
+MPI Rank 0: configparameters: cntk.config:precision=double
+MPI Rank 0: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 0: configparameters: cntk.config:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -396,7 +301,7 @@ MPI Rank 0:         CE = if trainingCriterion == 'CE'
 MPI Rank 0:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 0:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 0:         Err = if evalCriterion == 'Err' then
-MPI Rank 0:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 0:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 0:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 0:         logPrior = LogPrior(labels)
 MPI Rank 0:         // TODO: how to add a tag to an infix operation?
@@ -440,295 +345,564 @@ MPI Rank 0:             type = "real"
 MPI Rank 0:             scpFile = "glob_0000.scp"
 MPI Rank 0:         ]
 MPI Rank 0:         labels = [
-MPI Rank 0:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 0:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 0:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 0:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 18:17:50: Commands: speechTrain
-MPI Rank 0: 05/03/2016 18:17:50: Precision = "double"
-MPI Rank 0: 05/03/2016 18:17:50: Using 8 CPU threads.
-MPI Rank 0: 05/03/2016 18:17:50: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 18:17:50: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 18:17:50: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: ##############################################################################
-MPI Rank 0: 05/03/2016 18:17:50: #                                                                            #
-MPI Rank 0: 05/03/2016 18:17:50: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 18:17:50: #                                                                            #
-MPI Rank 0: 05/03/2016 18:17:50: ##############################################################################
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
+MPI Rank 0: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: command: speechTrain 
+MPI Rank 0: precision = double
+MPI Rank 0: Using 2 CPU threads
+MPI Rank 0: CNTKModelPath: /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn
+MPI Rank 0: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 0: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 0: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Creating virgin network.
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
+MPI Rank 0: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 0: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
 MPI Rank 0: 7 roots:
-MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 0: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 	MeanOfFeatures = Mean()
-MPI Rank 0: 	PosteriorProb = Softmax()
-MPI Rank 0: 	Prior = Mean()
-MPI Rank 0: 	ScaledLogLikelihood = Minus()
-MPI Rank 0: 
-MPI Rank 0: Validating network. 25 nodes to process in pass 1.
-MPI Rank 0: 
-MPI Rank 0: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 0: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 0: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 0: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 0: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 0: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 0: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 0: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 0: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 0: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 0: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 0: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 0: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 0: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 0: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 0: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 0: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 0: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 0: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 0: 
-MPI Rank 0: Validating network. 17 nodes to process in pass 2.
+MPI Rank 0: 	MeanOfFeatures = Mean
+MPI Rank 0: 	InvStdOfFeatures = InvStdDev
+MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 	Prior = Mean
+MPI Rank 0: 	ScaledLogLikelihood = Minus
+MPI Rank 0: 	PosteriorProb = Softmax
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 0: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: Validating network, final pass.
+MPI Rank 0: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node MeanOfFeatures, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 0: 
 MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 0: 
-MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: Validating for node Prior, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: 
+MPI Rank 0: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 0: 
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: Validating for node PosteriorProb, final verification.
+MPI Rank 0: 
+MPI Rank 0: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 0: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 0: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 0: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 0: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 0: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 0: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 0: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 0: 
+MPI Rank 0: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Created model with 25 nodes on GPU 0.
+MPI Rank 0: SGD using GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Training criterion node(s):
-MPI Rank 0: 05/03/2016 18:17:50: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: Training criterion node(s):
+MPI Rank 0: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Evaluation criterion node(s):
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: Evaluation criterion node(s):
+MPI Rank 0: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
 MPI Rank 0: 
-MPI Rank 0: Memory Sharing Structure:
+MPI Rank 0: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 0x2a87668: {[features Value[363 x *]] }
-MPI Rank 0: 0x2e36ad8: {[W0 Value[512 x 363]] }
-MPI Rank 0: 0x2e45e18: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 0x2e46328: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 0x3792b78: {[B0 Value[512 x 1]] }
-MPI Rank 0: 0x3794ce8: {[W1 Value[512 x 512]] }
-MPI Rank 0: 0x3b492a8: {[B1 Value[512 x 1]] }
-MPI Rank 0: 0x3b4a448: {[W2 Value[132 x 512]] }
-MPI Rank 0: 0x3b4b0f8: {[B2 Value[132 x 1]] }
-MPI Rank 0: 0x3b4bf28: {[labels Value[132 x *]] }
-MPI Rank 0: 0x3b4d188: {[Prior Value[132]] }
-MPI Rank 0: 0x3b52bb8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 0x3b52d18: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 0x3b52ed8: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 0x3b53368: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3b53498: {[LogOfPrior Value[132]] }
-MPI Rank 0: 0x3b54bf8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 0x3b553b8: {[W0*features Value[512 x *]] }
-MPI Rank 0: 0x3b555c8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 0x3b55728: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3b558e8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 0x3b55aa8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 0x3b55c68: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 0x3b55e28: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 0x3b56988: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 0x3b56b48: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3b56d08: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 0x3b56ec8: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 	NodeName: InvStdOfFeatures
+MPI Rank 0: 	NodeName: MeanOfFeatures
+MPI Rank 0: 	NodeName: Prior
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
+MPI Rank 0: Precomputing --> Completed.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 0: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:50: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 18:17:50: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 18:17:50: 	Prior = Mean()
+MPI Rank 0: Starting minibatch loop.
+MPI Rank 0: #PLUS#
+MPI Rank 0: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 0: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 0: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 0: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 0: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 0: #NLop5#
+MPI Rank 0: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 0: #PLUSBP#
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.40318406; EvalErrorPrediction = 0.90468750; TotalTime = 0.2407s; SamplesPerSecond = 2658.5
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15980357; EvalErrorPrediction = 0.87187500; TotalTime = 0.2186s; SamplesPerSecond = 2927.8
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.98424210; EvalErrorPrediction = 0.87812500; TotalTime = 0.2176s; SamplesPerSecond = 2940.5
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86209050; EvalErrorPrediction = 0.87656250; TotalTime = 0.2198s; SamplesPerSecond = 2912.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80597620; EvalErrorPrediction = 0.88593750; TotalTime = 0.2186s; SamplesPerSecond = 2928.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73511552; EvalErrorPrediction = 0.87812500; TotalTime = 0.2156s; SamplesPerSecond = 2969.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57260725; EvalErrorPrediction = 0.81875000; TotalTime = 0.2155s; SamplesPerSecond = 2969.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.42293687; EvalErrorPrediction = 0.80468750; TotalTime = 0.2172s; SamplesPerSecond = 2946.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.34304309; EvalErrorPrediction = 0.76718750; TotalTime = 0.2175s; SamplesPerSecond = 2942.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.37037793; EvalErrorPrediction = 0.84687500; TotalTime = 0.2167s; SamplesPerSecond = 2953.8
+MPI Rank 0: WARNING: The same matrix with dim [1, 1] has been transferred between different devices for 20 times.
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.21606065; EvalErrorPrediction = 0.76093750; TotalTime = 0.2119s; SamplesPerSecond = 3020.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.31610118; EvalErrorPrediction = 0.78437500; TotalTime = 0.2171s; SamplesPerSecond = 2947.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.14285888; EvalErrorPrediction = 0.75000000; TotalTime = 0.2174s; SamplesPerSecond = 2944.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.01821991; EvalErrorPrediction = 0.70937500; TotalTime = 0.2179s; SamplesPerSecond = 2936.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.01218944; EvalErrorPrediction = 0.73906250; TotalTime = 0.2171s; SamplesPerSecond = 2947.3
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  2.98947652; EvalErrorPrediction = 0.73593750; TotalTime = 0.2173s; SamplesPerSecond = 2945.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.86297716; EvalErrorPrediction = 0.70000000; TotalTime = 0.2174s; SamplesPerSecond = 2943.7
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.71901077; EvalErrorPrediction = 0.68593750; TotalTime = 0.2176s; SamplesPerSecond = 2941.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.80860596; EvalErrorPrediction = 0.71250000; TotalTime = 0.2169s; SamplesPerSecond = 2951.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.60590434; EvalErrorPrediction = 0.64687500; TotalTime = 0.2173s; SamplesPerSecond = 2945.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.63920069; EvalErrorPrediction = 0.66875000; TotalTime = 0.2176s; SamplesPerSecond = 2940.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.58372597; EvalErrorPrediction = 0.65781250; TotalTime = 0.2181s; SamplesPerSecond = 2934.9
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.50997096; EvalErrorPrediction = 0.62031250; TotalTime = 0.2169s; SamplesPerSecond = 2950.2
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42126950; EvalErrorPrediction = 0.62968750; TotalTime = 0.2186s; SamplesPerSecond = 2928.3
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40125789; EvalErrorPrediction = 0.65156250; TotalTime = 0.2086s; SamplesPerSecond = 3067.4
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.47110816; EvalErrorPrediction = 0.63281250; TotalTime = 0.2128s; SamplesPerSecond = 3007.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.33215267; EvalErrorPrediction = 0.60312500; TotalTime = 0.2160s; SamplesPerSecond = 2963.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.21936103; EvalErrorPrediction = 0.56875000; TotalTime = 0.2164s; SamplesPerSecond = 2957.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.31959580; EvalErrorPrediction = 0.61093750; TotalTime = 0.2164s; SamplesPerSecond = 2958.1
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.19592881; EvalErrorPrediction = 0.61718750; TotalTime = 0.2140s; SamplesPerSecond = 2991.0
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.28411654; EvalErrorPrediction = 0.60000000; TotalTime = 0.2051s; SamplesPerSecond = 3120.6
+MPI Rank 0:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.18307184; EvalErrorPrediction = 0.55781250; TotalTime = 0.2171s; SamplesPerSecond = 2948.2
+MPI Rank 0: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568; EvalErrorPrediction = 0.72426758; learningRatePerSample = 0.015625; EpochTime=7.00504
+MPI Rank 0: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:53: Precomputing --> Completed.
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.011961
+MPI Rank 0: Async gradient aggregation wait time: 0.000904
+MPI Rank 0: Actual gradient aggregation time: 0.033022
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.08990053; EvalErrorPrediction = 0.56640625; TotalTime = 0.2889s; SamplesPerSecond = 7976.0
+MPI Rank 0: Async gradient aggregation wait time: 0.005171
+MPI Rank 0: Actual gradient aggregation time: 0.02443
+MPI Rank 0: Async gradient aggregation wait time: 7e-06
+MPI Rank 0: Actual gradient aggregation time: 0.026637
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.16003887; EvalErrorPrediction = 0.58476562; TotalTime = 0.2810s; SamplesPerSecond = 9111.7
+MPI Rank 0: Async gradient aggregation wait time: 0.002957
+MPI Rank 0: Actual gradient aggregation time: 0.029982
+MPI Rank 0: Async gradient aggregation wait time: 1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.017448
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19985756; EvalErrorPrediction = 0.59492188; TotalTime = 0.2801s; SamplesPerSecond = 9141.1
+MPI Rank 0: Async gradient aggregation wait time: 7e-06
+MPI Rank 0: Actual gradient aggregation time: 0.016532
+MPI Rank 0: Async gradient aggregation wait time: 0.003627
+MPI Rank 0: Actual gradient aggregation time: 0.026545
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.12388714; EvalErrorPrediction = 0.57968750; TotalTime = 0.2753s; SamplesPerSecond = 9298.5
+MPI Rank 0: Async gradient aggregation wait time: 7e-06
+MPI Rank 0: Actual gradient aggregation time: 0.028147
+MPI Rank 0: Async gradient aggregation wait time: 0.001084
+MPI Rank 0: Actual gradient aggregation time: 0.010457
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.05908444; EvalErrorPrediction = 0.57070312; TotalTime = 0.2835s; SamplesPerSecond = 9029.2
+MPI Rank 0: Async gradient aggregation wait time: 0.000789
+MPI Rank 0: Actual gradient aggregation time: 0.031524
+MPI Rank 0: Async gradient aggregation wait time: 0.002736
+MPI Rank 0: Actual gradient aggregation time: 0.019029
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.13603725; EvalErrorPrediction = 0.57070312; TotalTime = 0.2996s; SamplesPerSecond = 8545.9
+MPI Rank 0: Async gradient aggregation wait time: 7e-06
+MPI Rank 0: Actual gradient aggregation time: 0.012875
+MPI Rank 0: Async gradient aggregation wait time: 7e-06
+MPI Rank 0: Actual gradient aggregation time: 0.008355
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09094421; EvalErrorPrediction = 0.56406250; TotalTime = 0.2910s; SamplesPerSecond = 8797.6
+MPI Rank 0: Async gradient aggregation wait time: 0.007399
+MPI Rank 0: Actual gradient aggregation time: 0.030404
+MPI Rank 0: Async gradient aggregation wait time: 0.002218
+MPI Rank 0: Actual gradient aggregation time: 0.031144
+MPI Rank 0:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.02829896; EvalErrorPrediction = 0.56210938; TotalTime = 0.3025s; SamplesPerSecond = 8461.9
+MPI Rank 0: Async gradient aggregation wait time: 0.00443
+MPI Rank 0: Actual gradient aggregation time: 0.013037
+MPI Rank 0: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672; EvalErrorPrediction = 0.57392578; learningRatePerSample = 0.001953125; EpochTime=2.33368
+MPI Rank 0: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 1.3e-05
+MPI Rank 0: Actual gradient aggregation time: 0.035052
+MPI Rank 0: Async gradient aggregation wait time: 8e-06
+MPI Rank 0: Actual gradient aggregation time: 0.039993
+MPI Rank 0:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.99429576; EvalErrorPrediction = 0.54709201; TotalTime = 0.5249s; SamplesPerSecond = 17558.8
+MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 0: Actual gradient aggregation time: 0.03286
+MPI Rank 0: Async gradient aggregation wait time: 0.007913
+MPI Rank 0: Actual gradient aggregation time: 0.036843
+MPI Rank 0:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.92530640; EvalErrorPrediction = 0.52812500; TotalTime = 0.5521s; SamplesPerSecond = 18546.6
+MPI Rank 0: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.958861; EvalErrorPrediction = 0.53725586; learningRatePerSample = 9.7656251e-05; EpochTime=1.12898
+MPI Rank 0: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:54: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:54: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1995s; samplesPerSecond = 3207.7
-MPI Rank 0: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0954s; samplesPerSecond = 6706.8
-MPI Rank 0: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6688.9
-MPI Rank 0: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6684.8
-MPI Rank 0: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6683.9
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6685.0
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0957s; samplesPerSecond = 6688.8
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0958s; samplesPerSecond = 6683.7
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0958s; samplesPerSecond = 6680.2
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0960s; samplesPerSecond = 6666.6
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0954s; samplesPerSecond = 6706.6
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0956s; samplesPerSecond = 6697.2
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0955s; samplesPerSecond = 6702.7
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0950s; samplesPerSecond = 6735.6
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0957s; samplesPerSecond = 6686.8
-MPI Rank 0: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0956s; samplesPerSecond = 6691.9
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0957s; samplesPerSecond = 6688.8
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0960s; samplesPerSecond = 6663.5
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0955s; samplesPerSecond = 6702.1
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0953s; samplesPerSecond = 6712.5
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0954s; samplesPerSecond = 6706.9
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0951s; samplesPerSecond = 6731.2
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6688.5
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0961s; samplesPerSecond = 6660.2
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0955s; samplesPerSecond = 6704.0
-MPI Rank 0: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6708.7
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0940s; samplesPerSecond = 6806.6
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0950s; samplesPerSecond = 6739.8
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0950s; samplesPerSecond = 6736.4
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0949s; samplesPerSecond = 6743.7
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0949s; samplesPerSecond = 6743.4
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0720s; samplesPerSecond = 8889.6
-MPI Rank 0: 05/03/2016 18:17:57: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.1828s
-MPI Rank 0: 05/03/2016 18:17:57: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:57: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.012992
-MPI Rank 0: Async gradient aggregation wait time: 0.004643
-MPI Rank 0: Actual gradient aggregation time: 0.007964
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08990053 * 2304; EvalErrorPrediction = 0.56640625 * 2304; time = 0.1340s; samplesPerSecond = 17197.5
-MPI Rank 0: Async gradient aggregation wait time: 0.010201
-MPI Rank 0: Actual gradient aggregation time: 0.015208
-MPI Rank 0: Async gradient aggregation wait time: 0.004597
-MPI Rank 0: Actual gradient aggregation time: 0.014249
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.16003887 * 2560; EvalErrorPrediction = 0.58476562 * 2560; time = 0.1482s; samplesPerSecond = 17268.5
-MPI Rank 0: Async gradient aggregation wait time: 0.004595
-MPI Rank 0: Actual gradient aggregation time: 0.012777
-MPI Rank 0: Async gradient aggregation wait time: 0.004582
-MPI Rank 0: Actual gradient aggregation time: 0.015254
-MPI Rank 0: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19985756 * 2560; EvalErrorPrediction = 0.59492188 * 2560; time = 0.1433s; samplesPerSecond = 17865.7
-MPI Rank 0: Async gradient aggregation wait time: 0.004371
-MPI Rank 0: Actual gradient aggregation time: 0.01498
-MPI Rank 0: Async gradient aggregation wait time: 0.004524
-MPI Rank 0: Actual gradient aggregation time: 0.015111
-MPI Rank 0: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.12388714 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.1492s; samplesPerSecond = 17160.5
-MPI Rank 0: Async gradient aggregation wait time: 0.005062
-MPI Rank 0: Actual gradient aggregation time: 0.015274
-MPI Rank 0: Async gradient aggregation wait time: 0.004494
-MPI Rank 0: Actual gradient aggregation time: 0.014957
-MPI Rank 0: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.05908444 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1524s; samplesPerSecond = 16798.2
-MPI Rank 0: Async gradient aggregation wait time: 0.004484
-MPI Rank 0: Actual gradient aggregation time: 0.014571
-MPI Rank 0: Async gradient aggregation wait time: 0.004407
-MPI Rank 0: Actual gradient aggregation time: 0.015128
-MPI Rank 0: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.13603725 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1515s; samplesPerSecond = 16892.9
-MPI Rank 0: Async gradient aggregation wait time: 0.00506
-MPI Rank 0: Actual gradient aggregation time: 0.013164
-MPI Rank 0: Async gradient aggregation wait time: 0.004542
-MPI Rank 0: Actual gradient aggregation time: 0.015448
-MPI Rank 0: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09094421 * 2560; EvalErrorPrediction = 0.56406250 * 2560; time = 0.1505s; samplesPerSecond = 17014.0
-MPI Rank 0: Async gradient aggregation wait time: 0.00451
-MPI Rank 0: Actual gradient aggregation time: 0.015246
-MPI Rank 0: Async gradient aggregation wait time: 0.004543
-MPI Rank 0: Actual gradient aggregation time: 0.015079
-MPI Rank 0: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.02829896 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1497s; samplesPerSecond = 17097.0
-MPI Rank 0: Async gradient aggregation wait time: 0.006283
-MPI Rank 0: Actual gradient aggregation time: 0.006489
-MPI Rank 0: 05/03/2016 18:17:58: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672 * 20480; EvalErrorPrediction = 0.57392578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.19398s
-MPI Rank 0: 05/03/2016 18:17:58: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:58: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:58: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.00419
-MPI Rank 0: Actual gradient aggregation time: 0.02843
-MPI Rank 0: Async gradient aggregation wait time: 0.004076
-MPI Rank 0: Actual gradient aggregation time: 0.029096
-MPI Rank 0: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.99429576 * 9216; EvalErrorPrediction = 0.54709201 * 9216; time = 0.2694s; samplesPerSecond = 34204.1
-MPI Rank 0: Async gradient aggregation wait time: 0.002983
-MPI Rank 0: Actual gradient aggregation time: 0.028394
-MPI Rank 0: Async gradient aggregation wait time: 0.004046
-MPI Rank 0: Actual gradient aggregation time: 0.028312
-MPI Rank 0: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.92530640 * 10240; EvalErrorPrediction = 0.52812500 * 10240; time = 0.2601s; samplesPerSecond = 39366.0
-MPI Rank 0: 05/03/2016 18:17:59: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.95886100 * 20480; EvalErrorPrediction = 0.53725586 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.543814s
-MPI Rank 0: 05/03/2016 18:17:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:59: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:17:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.004651
-MPI Rank 0: Actual gradient aggregation time: 0.030131
-MPI Rank 0: Async gradient aggregation wait time: 0.006592
-MPI Rank 0: Actual gradient aggregation time: 0.024651
-MPI Rank 0: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90011431 * 9216; EvalErrorPrediction = 0.51725260 * 9216; time = 0.2713s; samplesPerSecond = 33974.9
-MPI Rank 0: Async gradient aggregation wait time: 0.004322
-MPI Rank 0: Actual gradient aggregation time: 0.029603
-MPI Rank 0: Async gradient aggregation wait time: 0.006214
-MPI Rank 0: Actual gradient aggregation time: 0.023129
-MPI Rank 0: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88429973 * 10240; EvalErrorPrediction = 0.52099609 * 10240; time = 0.2741s; samplesPerSecond = 37364.6
-MPI Rank 0: Async gradient aggregation wait time: 0.006246
-MPI Rank 0: 05/03/2016 18:17:59: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911 * 20480; EvalErrorPrediction = 0.51933594 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.562062s
-MPI Rank 0: 05/03/2016 18:17:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 18:18:00: CNTKCommandTrainEnd: speechTrain
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:18:00: Action "train" complete.
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 18:18:00: __COMPLETED__
-MPI Rank 1: 05/03/2016 18:17:50: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 18:17:50: Build info: 
+MPI Rank 0: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
+MPI Rank 0: Actual gradient aggregation time: 0.00691
+MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
+MPI Rank 0: Actual gradient aggregation time: 0.007876
+MPI Rank 0:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.90011431; EvalErrorPrediction = 0.51725260; TotalTime = 0.5175s; SamplesPerSecond = 17807.9
+MPI Rank 0: Async gradient aggregation wait time: 1.2e-05
+MPI Rank 0: Actual gradient aggregation time: 0.032879
+MPI Rank 0: Async gradient aggregation wait time: 0.006177
+MPI Rank 0: Actual gradient aggregation time: 0.055069
+MPI Rank 0:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.88429973; EvalErrorPrediction = 0.52099609; TotalTime = 0.5251s; SamplesPerSecond = 19499.9
+MPI Rank 0: Async gradient aggregation wait time: 0.004648
+MPI Rank 0: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911; EvalErrorPrediction = 0.51933594; learningRatePerSample = 9.7656251e-05; EpochTime=1.10761
+MPI Rank 0: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: __COMPLETED__
+MPI Rank 0: ~MPIWrapper
+MPI Rank 1: -------------------------------------------------------------------
+MPI Rank 1: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: 		Built time: May  3 2016 17:56:15
-MPI Rank 1: 05/03/2016 18:17:50: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 1: 05/03/2016 18:17:50: 		Build type: release
-MPI Rank 1: 05/03/2016 18:17:50: 		Build target: GPU
-MPI Rank 1: 05/03/2016 18:17:50: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 18:17:50: 		Math lib: acml
-MPI Rank 1: 05/03/2016 18:17:50: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 1: 05/03/2016 18:17:50: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 1: 05/03/2016 18:17:50: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 1: 05/03/2016 18:17:50: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 18:17:50: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 1: 05/03/2016 18:17:50: 		Built by philly on 18750d26eb32
-MPI Rank 1: 05/03/2016 18:17:50: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 1: 05/03/2016 18:17:50: -------------------------------------------------------------------
+MPI Rank 1: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 1: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 1: 		Build type: debug
+MPI Rank 1: 		Math lib: acml
+MPI Rank 1: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 1: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 1: 		Build Branch: master
+MPI Rank 1: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 1: -------------------------------------------------------------------
+MPI Rank 1: running on localhost at 2016/01/06 23:25:25
+MPI Rank 1: command line: 
+MPI Rank 1: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=0 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Running on localhost at 2016/05/03 18:17:50
-MPI Rank 1: 05/03/2016 18:17:50: Command line: 
-MPI Rank 1: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 1: 
-MPI Rank 1: 
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:17:50: precision = "float"
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -766,7 +940,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -817,34 +991,30 @@ MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
-MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 1: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 1: DeviceId=0
-MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
-MPI Rank 1: numCPUThreads=8
+MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 18:17:50: precision = "float"
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -876,7 +1046,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -920,47 +1090,42 @@ MPI Rank 1:             type = "real"
 MPI Rank 1:             scpFile = "glob_0000.scp"
 MPI Rank 1:         ]
 MPI Rank 1:         labels = [
-MPI Rank 1:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 1:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 1:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 1:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
-MPI Rank 1: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 1: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 1: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 1: DeviceId=0
-MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
-MPI Rank 1: numCPUThreads=8
+MPI Rank 1: numCPUThreads=2
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 1: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 1: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 1: configparameters: cntk.cntk:deviceId=0
-MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 1: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: configparameters: cntk.config:command=speechTrain
+MPI Rank 1: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 1: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 1: configparameters: cntk.config:deviceId=0
+MPI Rank 1: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 1: configparameters: cntk.config:parallelTrain=true
+MPI Rank 1: configparameters: cntk.config:precision=double
+MPI Rank 1: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 1: configparameters: cntk.config:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -992,7 +1157,7 @@ MPI Rank 1:         CE = if trainingCriterion == 'CE'
 MPI Rank 1:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 1:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 1:         Err = if evalCriterion == 'Err' then
-MPI Rank 1:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 1:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 1:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 1:         logPrior = LogPrior(labels)
 MPI Rank 1:         // TODO: how to add a tag to an infix operation?
@@ -1036,291 +1201,564 @@ MPI Rank 1:             type = "real"
 MPI Rank 1:             scpFile = "glob_0000.scp"
 MPI Rank 1:         ]
 MPI Rank 1:         labels = [
-MPI Rank 1:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 1:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 1:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 1:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 18:17:50: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 18:17:50: Commands: speechTrain
-MPI Rank 1: 05/03/2016 18:17:50: Precision = "double"
-MPI Rank 1: 05/03/2016 18:17:50: Using 8 CPU threads.
-MPI Rank 1: 05/03/2016 18:17:50: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 18:17:50: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 18:17:50: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: ##############################################################################
-MPI Rank 1: 05/03/2016 18:17:50: #                                                                            #
-MPI Rank 1: 05/03/2016 18:17:50: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 18:17:50: #                                                                            #
-MPI Rank 1: 05/03/2016 18:17:50: ##############################################################################
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
+MPI Rank 1: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: command: speechTrain 
+MPI Rank 1: precision = double
+MPI Rank 1: Using 2 CPU threads
+MPI Rank 1: CNTKModelPath: /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn
+MPI Rank 1: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 1: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 1: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Creating virgin network.
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
+MPI Rank 1: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 1: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
 MPI Rank 1: 7 roots:
-MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 1: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 	MeanOfFeatures = Mean()
-MPI Rank 1: 	PosteriorProb = Softmax()
-MPI Rank 1: 	Prior = Mean()
-MPI Rank 1: 	ScaledLogLikelihood = Minus()
-MPI Rank 1: 
-MPI Rank 1: Validating network. 25 nodes to process in pass 1.
-MPI Rank 1: 
-MPI Rank 1: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 1: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 1: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 1: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 1: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 1: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 1: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 1: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 1: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 1: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 1: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 1: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 1: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 1: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 1: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 1: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 1: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 1: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 1: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 1: 
-MPI Rank 1: Validating network. 17 nodes to process in pass 2.
+MPI Rank 1: 	MeanOfFeatures = Mean
+MPI Rank 1: 	InvStdOfFeatures = InvStdDev
+MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 	Prior = Mean
+MPI Rank 1: 	ScaledLogLikelihood = Minus
+MPI Rank 1: 	PosteriorProb = Softmax
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 1: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: Validating network, final pass.
+MPI Rank 1: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node MeanOfFeatures, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 1: 
 MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 1: 
-MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: Validating for node Prior, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: 
+MPI Rank 1: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 1: 
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: Validating for node PosteriorProb, final verification.
+MPI Rank 1: 
+MPI Rank 1: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 1: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 1: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 1: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 1: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 1: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 1: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 1: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 1: 
+MPI Rank 1: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Created model with 25 nodes on GPU 0.
+MPI Rank 1: SGD using GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Training criterion node(s):
-MPI Rank 1: 05/03/2016 18:17:50: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: Training criterion node(s):
+MPI Rank 1: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Evaluation criterion node(s):
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: Evaluation criterion node(s):
+MPI Rank 1: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
 MPI Rank 1: 
-MPI Rank 1: Memory Sharing Structure:
+MPI Rank 1: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 0x2654e68: {[features Value[363 x *]] }
-MPI Rank 1: 0x2ebc048: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 0x2ebd208: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 0x2ebdf38: {[W0 Value[512 x 363]] }
-MPI Rank 1: 0x33e01f8: {[W1 Value[512 x 512]] }
-MPI Rank 1: 0x33e0fc8: {[B1 Value[512 x 1]] }
-MPI Rank 1: 0x33e2168: {[W2 Value[132 x 512]] }
-MPI Rank 1: 0x33e2e18: {[B2 Value[132 x 1]] }
-MPI Rank 1: 0x33e3c48: {[labels Value[132 x *]] }
-MPI Rank 1: 0x33e4ea8: {[Prior Value[132]] }
-MPI Rank 1: 0x33ea748: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 0x33eaa48: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 0x33eac08: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 0x33eb098: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 0x33eb208: {[LogOfPrior Value[132]] }
-MPI Rank 1: 0x33f0808: {[B0 Value[512 x 1]] }
-MPI Rank 1: 0x36cbed8: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 0x36cc698: {[W0*features Value[512 x *]] }
-MPI Rank 1: 0x36cc8a8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 0x36cca08: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x36ccb68: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 0x36ccd28: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 0x36ccee8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 0x36cd0a8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 0x36cdc08: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 0x36cddc8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x36cdf88: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 0x36ce148: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 	NodeName: InvStdOfFeatures
+MPI Rank 1: 	NodeName: MeanOfFeatures
+MPI Rank 1: 	NodeName: Prior
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
+MPI Rank 1: Precomputing --> Completed.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 1: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:50: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 18:17:50: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 18:17:50: 	Prior = Mean()
+MPI Rank 1: Starting minibatch loop.
+MPI Rank 1: #PLUS#
+MPI Rank 1: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 1: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 1: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 1: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 1: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 1: #NLop5#
+MPI Rank 1: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 1: #PLUSBP#
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.40318406; EvalErrorPrediction = 0.90468750; TotalTime = 0.2462s; SamplesPerSecond = 2599.7
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15980357; EvalErrorPrediction = 0.87187500; TotalTime = 0.2190s; SamplesPerSecond = 2921.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.98424210; EvalErrorPrediction = 0.87812500; TotalTime = 0.2180s; SamplesPerSecond = 2935.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86209050; EvalErrorPrediction = 0.87656250; TotalTime = 0.2181s; SamplesPerSecond = 2934.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80597620; EvalErrorPrediction = 0.88593750; TotalTime = 0.2176s; SamplesPerSecond = 2941.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73511552; EvalErrorPrediction = 0.87812500; TotalTime = 0.2156s; SamplesPerSecond = 2968.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57260725; EvalErrorPrediction = 0.81875000; TotalTime = 0.2167s; SamplesPerSecond = 2952.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.42293687; EvalErrorPrediction = 0.80468750; TotalTime = 0.2171s; SamplesPerSecond = 2947.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.34304309; EvalErrorPrediction = 0.76718750; TotalTime = 0.2176s; SamplesPerSecond = 2940.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.37037793; EvalErrorPrediction = 0.84687500; TotalTime = 0.2165s; SamplesPerSecond = 2956.6
+MPI Rank 1: WARNING: The same matrix with dim [1, 1] has been transferred between different devices for 20 times.
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.21606065; EvalErrorPrediction = 0.76093750; TotalTime = 0.2191s; SamplesPerSecond = 2920.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.31610118; EvalErrorPrediction = 0.78437500; TotalTime = 0.2175s; SamplesPerSecond = 2942.0
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.14285888; EvalErrorPrediction = 0.75000000; TotalTime = 0.2171s; SamplesPerSecond = 2947.5
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.01821991; EvalErrorPrediction = 0.70937500; TotalTime = 0.2173s; SamplesPerSecond = 2945.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.01218944; EvalErrorPrediction = 0.73906250; TotalTime = 0.2180s; SamplesPerSecond = 2936.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  2.98947652; EvalErrorPrediction = 0.73593750; TotalTime = 0.2171s; SamplesPerSecond = 2948.0
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.86297716; EvalErrorPrediction = 0.70000000; TotalTime = 0.2173s; SamplesPerSecond = 2944.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.71901077; EvalErrorPrediction = 0.68593750; TotalTime = 0.2170s; SamplesPerSecond = 2949.2
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.80860596; EvalErrorPrediction = 0.71250000; TotalTime = 0.2177s; SamplesPerSecond = 2939.5
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.60590434; EvalErrorPrediction = 0.64687500; TotalTime = 0.2173s; SamplesPerSecond = 2945.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.63920069; EvalErrorPrediction = 0.66875000; TotalTime = 0.2174s; SamplesPerSecond = 2943.4
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.58372597; EvalErrorPrediction = 0.65781250; TotalTime = 0.2175s; SamplesPerSecond = 2942.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.50997096; EvalErrorPrediction = 0.62031250; TotalTime = 0.2175s; SamplesPerSecond = 2942.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42126950; EvalErrorPrediction = 0.62968750; TotalTime = 0.2186s; SamplesPerSecond = 2927.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40125789; EvalErrorPrediction = 0.65156250; TotalTime = 0.2246s; SamplesPerSecond = 2849.6
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.47110816; EvalErrorPrediction = 0.63281250; TotalTime = 0.2172s; SamplesPerSecond = 2945.9
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.33215267; EvalErrorPrediction = 0.60312500; TotalTime = 0.2159s; SamplesPerSecond = 2963.7
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.21936103; EvalErrorPrediction = 0.56875000; TotalTime = 0.2164s; SamplesPerSecond = 2957.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.31959580; EvalErrorPrediction = 0.61093750; TotalTime = 0.2162s; SamplesPerSecond = 2959.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.19592881; EvalErrorPrediction = 0.61718750; TotalTime = 0.2250s; SamplesPerSecond = 2844.3
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.28411654; EvalErrorPrediction = 0.60000000; TotalTime = 0.2194s; SamplesPerSecond = 2916.8
+MPI Rank 1:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.18307184; EvalErrorPrediction = 0.55781250; TotalTime = 0.1922s; SamplesPerSecond = 3330.0
+MPI Rank 1: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568; EvalErrorPrediction = 0.72426758; learningRatePerSample = 0.015625; EpochTime=7.00464
+MPI Rank 1: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:53: Precomputing --> Completed.
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.025686
+MPI Rank 1: Async gradient aggregation wait time: 0.004606
+MPI Rank 1: Actual gradient aggregation time: 0.033068
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.08990053; EvalErrorPrediction = 0.56640625; TotalTime = 0.2998s; SamplesPerSecond = 7685.1
+MPI Rank 1: Async gradient aggregation wait time: 0.00059
+MPI Rank 1: Actual gradient aggregation time: 0.023269
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.00805
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.16003887; EvalErrorPrediction = 0.58476562; TotalTime = 0.2786s; SamplesPerSecond = 9187.9
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.012697
+MPI Rank 1: Async gradient aggregation wait time: 6e-06
+MPI Rank 1: Actual gradient aggregation time: 0.014048
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19985756; EvalErrorPrediction = 0.59492188; TotalTime = 0.2718s; SamplesPerSecond = 9417.8
+MPI Rank 1: Async gradient aggregation wait time: 0.003797
+MPI Rank 1: Actual gradient aggregation time: 0.016977
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.008841
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.12388714; EvalErrorPrediction = 0.57968750; TotalTime = 0.2784s; SamplesPerSecond = 9194.2
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.013979
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.0094
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.05908444; EvalErrorPrediction = 0.57070312; TotalTime = 0.2847s; SamplesPerSecond = 8991.4
+MPI Rank 1: Async gradient aggregation wait time: 0.015021
+MPI Rank 1: Actual gradient aggregation time: 0.019205
+MPI Rank 1: Async gradient aggregation wait time: 0.002628
+MPI Rank 1: Actual gradient aggregation time: 0.020009
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.13603725; EvalErrorPrediction = 0.57070312; TotalTime = 0.2944s; SamplesPerSecond = 8695.6
+MPI Rank 1: Async gradient aggregation wait time: 0.01
+MPI Rank 1: Actual gradient aggregation time: 0.01446
+MPI Rank 1: Async gradient aggregation wait time: 0.003974
+MPI Rank 1: Actual gradient aggregation time: 0.031839
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09094421; EvalErrorPrediction = 0.56406250; TotalTime = 0.2953s; SamplesPerSecond = 8670.5
+MPI Rank 1: Async gradient aggregation wait time: 0.007178
+MPI Rank 1: Actual gradient aggregation time: 0.03041
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.017581
+MPI Rank 1:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.02829896; EvalErrorPrediction = 0.56210938; TotalTime = 0.3019s; SamplesPerSecond = 8478.7
+MPI Rank 1: Async gradient aggregation wait time: 0.005355
+MPI Rank 1: Actual gradient aggregation time: 0.022981
+MPI Rank 1: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672; EvalErrorPrediction = 0.57392578; learningRatePerSample = 0.001953125; EpochTime=2.33933
+MPI Rank 1: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 1.1e-05
+MPI Rank 1: Actual gradient aggregation time: 0.009509
+MPI Rank 1: Async gradient aggregation wait time: 1.2e-05
+MPI Rank 1: Actual gradient aggregation time: 0.006755
+MPI Rank 1:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.99429576; EvalErrorPrediction = 0.54709201; TotalTime = 0.5328s; SamplesPerSecond = 17298.4
+MPI Rank 1: Async gradient aggregation wait time: 9e-06
+MPI Rank 1: Actual gradient aggregation time: 0.007444
+MPI Rank 1: Async gradient aggregation wait time: 1e-05
+MPI Rank 1: Actual gradient aggregation time: 0.007848
+MPI Rank 1:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.92530640; EvalErrorPrediction = 0.52812500; TotalTime = 0.5467s; SamplesPerSecond = 18730.9
+MPI Rank 1: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.958861; EvalErrorPrediction = 0.53725586; learningRatePerSample = 9.7656251e-05; EpochTime=1.12851
+MPI Rank 1: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:54: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:54: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1373s; samplesPerSecond = 4662.7
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0955s; samplesPerSecond = 6702.9
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0955s; samplesPerSecond = 6704.2
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6685.5
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6681.9
-MPI Rank 1: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6685.5
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0957s; samplesPerSecond = 6684.7
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0957s; samplesPerSecond = 6690.0
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0958s; samplesPerSecond = 6677.2
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0957s; samplesPerSecond = 6690.9
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0954s; samplesPerSecond = 6709.6
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0955s; samplesPerSecond = 6701.1
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0956s; samplesPerSecond = 6697.6
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0955s; samplesPerSecond = 6702.9
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0956s; samplesPerSecond = 6693.2
-MPI Rank 1: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0957s; samplesPerSecond = 6684.8
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0956s; samplesPerSecond = 6695.5
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0957s; samplesPerSecond = 6687.2
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0955s; samplesPerSecond = 6704.4
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0954s; samplesPerSecond = 6708.5
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0954s; samplesPerSecond = 6710.1
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0954s; samplesPerSecond = 6708.8
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6685.3
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0957s; samplesPerSecond = 6687.7
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0955s; samplesPerSecond = 6700.7
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6705.9
-MPI Rank 1: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0978s; samplesPerSecond = 6545.5
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0950s; samplesPerSecond = 6739.2
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0950s; samplesPerSecond = 6737.7
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0950s; samplesPerSecond = 6738.9
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0949s; samplesPerSecond = 6744.6
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0950s; samplesPerSecond = 6739.0
-MPI Rank 1: 05/03/2016 18:17:57: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.14345s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:57: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.013894
-MPI Rank 1: Async gradient aggregation wait time: 0.004456
-MPI Rank 1: Actual gradient aggregation time: 0.009517
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08990053 * 2304; EvalErrorPrediction = 0.56640625 * 2304; time = 0.1436s; samplesPerSecond = 16043.8
-MPI Rank 1: Async gradient aggregation wait time: 0.003536
-MPI Rank 1: Actual gradient aggregation time: 0.015354
-MPI Rank 1: Async gradient aggregation wait time: 0.003722
-MPI Rank 1: Actual gradient aggregation time: 0.014315
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.16003887 * 2560; EvalErrorPrediction = 0.58476562 * 2560; time = 0.1381s; samplesPerSecond = 18540.2
-MPI Rank 1: Async gradient aggregation wait time: 0.003685
-MPI Rank 1: Actual gradient aggregation time: 0.013215
-MPI Rank 1: Async gradient aggregation wait time: 0.003692
-MPI Rank 1: Actual gradient aggregation time: 0.015446
-MPI Rank 1: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19985756 * 2560; EvalErrorPrediction = 0.59492188 * 2560; time = 0.1433s; samplesPerSecond = 17865.4
-MPI Rank 1: Async gradient aggregation wait time: 0.005056
-MPI Rank 1: Actual gradient aggregation time: 0.014899
-MPI Rank 1: Async gradient aggregation wait time: 0.003638
-MPI Rank 1: Actual gradient aggregation time: 0.015239
-MPI Rank 1: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.12388714 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.1491s; samplesPerSecond = 17173.4
-MPI Rank 1: Async gradient aggregation wait time: 0.003709
-MPI Rank 1: Actual gradient aggregation time: 0.015194
-MPI Rank 1: Async gradient aggregation wait time: 0.003629
-MPI Rank 1: Actual gradient aggregation time: 0.015065
-MPI Rank 1: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.05908444 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1525s; samplesPerSecond = 16790.1
-MPI Rank 1: Async gradient aggregation wait time: 0.003577
-MPI Rank 1: Actual gradient aggregation time: 0.014749
-MPI Rank 1: Async gradient aggregation wait time: 0.00337
-MPI Rank 1: Actual gradient aggregation time: 0.015318
-MPI Rank 1: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.13603725 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1515s; samplesPerSecond = 16895.1
-MPI Rank 1: Async gradient aggregation wait time: 0.004412
-MPI Rank 1: Actual gradient aggregation time: 0.014349
-MPI Rank 1: Async gradient aggregation wait time: 0.003659
-MPI Rank 1: Actual gradient aggregation time: 0.01547
-MPI Rank 1: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09094421 * 2560; EvalErrorPrediction = 0.56406250 * 2560; time = 0.1505s; samplesPerSecond = 17006.0
-MPI Rank 1: Async gradient aggregation wait time: 0.003496
-MPI Rank 1: Actual gradient aggregation time: 0.015411
-MPI Rank 1: Async gradient aggregation wait time: 0.003639
-MPI Rank 1: Actual gradient aggregation time: 0.015077
-MPI Rank 1: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.02829896 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1495s; samplesPerSecond = 17119.2
-MPI Rank 1: Async gradient aggregation wait time: 0.006459
-MPI Rank 1: Actual gradient aggregation time: 0.006551
-MPI Rank 1: 05/03/2016 18:17:58: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672 * 20480; EvalErrorPrediction = 0.57392578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.19347s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:58: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:58: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.013751
-MPI Rank 1: Actual gradient aggregation time: 0.028508
-MPI Rank 1: Async gradient aggregation wait time: 0.003459
-MPI Rank 1: Actual gradient aggregation time: 0.029236
-MPI Rank 1: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.99429576 * 9216; EvalErrorPrediction = 0.54709201 * 9216; time = 0.2689s; samplesPerSecond = 34270.2
-MPI Rank 1: Async gradient aggregation wait time: 0.002465
-MPI Rank 1: Actual gradient aggregation time: 0.028459
-MPI Rank 1: Async gradient aggregation wait time: 0.000286
-MPI Rank 1: Actual gradient aggregation time: 0.015157
-MPI Rank 1: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.92530640 * 10240; EvalErrorPrediction = 0.52812500 * 10240; time = 0.2555s; samplesPerSecond = 40080.6
-MPI Rank 1: 05/03/2016 18:17:59: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.95886100 * 20480; EvalErrorPrediction = 0.53725586 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.543309s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:59: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:17:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.004189
-MPI Rank 1: Actual gradient aggregation time: 0.03077
-MPI Rank 1: Async gradient aggregation wait time: 0.003386
-MPI Rank 1: Actual gradient aggregation time: 0.02757
-MPI Rank 1: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90011431 * 9216; EvalErrorPrediction = 0.51725260 * 9216; time = 0.2711s; samplesPerSecond = 33998.6
-MPI Rank 1: Async gradient aggregation wait time: 0.003756
-MPI Rank 1: Actual gradient aggregation time: 0.029704
-MPI Rank 1: Async gradient aggregation wait time: 0.000726
-MPI Rank 1: Actual gradient aggregation time: 0.023982
-MPI Rank 1: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88429973 * 10240; EvalErrorPrediction = 0.52099609 * 10240; time = 0.2741s; samplesPerSecond = 37354.4
-MPI Rank 1: Async gradient aggregation wait time: 0.006279
-MPI Rank 1: 05/03/2016 18:17:59: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911 * 20480; EvalErrorPrediction = 0.51933594 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.561545s
-MPI Rank 1: 05/03/2016 18:18:00: CNTKCommandTrainEnd: speechTrain
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:18:00: Action "train" complete.
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 18:18:00: __COMPLETED__
-MPI Rank 2: 05/03/2016 18:17:51: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 18:17:51: Build info: 
+MPI Rank 1: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 6e-06
+MPI Rank 1: Actual gradient aggregation time: 0.038811
+MPI Rank 1: Async gradient aggregation wait time: 7e-06
+MPI Rank 1: Actual gradient aggregation time: 0.034876
+MPI Rank 1:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.90011431; EvalErrorPrediction = 0.51725260; TotalTime = 0.5304s; SamplesPerSecond = 17374.8
+MPI Rank 1: Async gradient aggregation wait time: 1e-05
+MPI Rank 1: Actual gradient aggregation time: 0.006991
+MPI Rank 1: Async gradient aggregation wait time: 1.4e-05
+MPI Rank 1: Actual gradient aggregation time: 0.006682
+MPI Rank 1:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.88429973; EvalErrorPrediction = 0.52099609; TotalTime = 0.5395s; SamplesPerSecond = 18980.1
+MPI Rank 1: Async gradient aggregation wait time: 0.004311
+MPI Rank 1: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911; EvalErrorPrediction = 0.51933594; learningRatePerSample = 9.7656251e-05; EpochTime=1.10764
+MPI Rank 1: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: __COMPLETED__
+MPI Rank 1: ~MPIWrapper
+MPI Rank 2: -------------------------------------------------------------------
+MPI Rank 2: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: 		Built time: May  3 2016 17:56:15
-MPI Rank 2: 05/03/2016 18:17:51: 		Last modified date: Tue May  3 11:36:22 2016
-MPI Rank 2: 05/03/2016 18:17:51: 		Build type: release
-MPI Rank 2: 05/03/2016 18:17:51: 		Build target: GPU
-MPI Rank 2: 05/03/2016 18:17:51: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 18:17:51: 		Math lib: acml
-MPI Rank 2: 05/03/2016 18:17:51: 		CUDA_PATH: /usr/local/cuda-7.5
-MPI Rank 2: 05/03/2016 18:17:51: 		CUB_PATH: /usr/local/cub-1.4.1
-MPI Rank 2: 05/03/2016 18:17:51: 		CUDNN_PATH: /usr/local/cudnn-4.0
-MPI Rank 2: 05/03/2016 18:17:51: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 18:17:51: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-MPI Rank 2: 05/03/2016 18:17:51: 		Built by philly on 18750d26eb32
-MPI Rank 2: 05/03/2016 18:17:51: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-MPI Rank 2: 05/03/2016 18:17:51: -------------------------------------------------------------------
+MPI Rank 2: 		Built time: Jan  6 2016 19:01:02
+MPI Rank 2: 		Last modified date: Tue Jan  5 10:37:19 2016
+MPI Rank 2: 		Build type: debug
+MPI Rank 2: 		Math lib: acml
+MPI Rank 2: 		CUDA_PATH: /usr/local/cuda-7.0
+MPI Rank 2: 		CUB_PATH: /usr/local/cub-1.4.1
+MPI Rank 2: 		Build Branch: master
+MPI Rank 2: 		Build SHA1: f88156c7f48e6418e0e5e2998e159c54aaca3c1d
+MPI Rank 2: -------------------------------------------------------------------
+MPI Rank 2: running on localhost at 2016/01/06 23:25:25
+MPI Rank 2: command line: 
+MPI Rank 2: /home/mluser/src/cplx_master/build/debug/bin/cntk configFile=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../cntk.config currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/.. DeviceId=0 numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Running on localhost at 2016/05/03 18:17:51
-MPI Rank 2: 05/03/2016 18:17:51: Command line: 
-MPI Rank 2: /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=8  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 2: 
-MPI Rank 2: 
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:17:51: precision = "float"
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1358,7 +1796,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1409,34 +1847,30 @@ MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
-MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 2: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 2: DeviceId=0
-MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
-MPI Rank 2: numCPUThreads=8
+MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 18:17:51: precision = "float"
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1468,7 +1902,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1512,47 +1946,42 @@ MPI Rank 2:             type = "real"
 MPI Rank 2:             scpFile = "glob_0000.scp"
 MPI Rank 2:         ]
 MPI Rank 2:         labels = [
-MPI Rank 2:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 2:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 2:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 2:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
-MPI Rank 2: currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 2: DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 2: DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
 MPI Rank 2: DeviceId=0
-MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
-MPI Rank 2: numCPUThreads=8
+MPI Rank 2: numCPUThreads=2
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
-MPI Rank 2: configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/../../../DNN
-MPI Rank 2: configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-MPI Rank 2: configparameters: cntk.cntk:deviceId=0
-MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
-MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
-MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
+MPI Rank 2: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: configparameters: cntk.config:command=speechTrain
+MPI Rank 2: configparameters: cntk.config:ConfigDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/..
+MPI Rank 2: configparameters: cntk.config:currentDirectory=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: configparameters: cntk.config:DataDir=/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data
+MPI Rank 2: configparameters: cntk.config:deviceId=0
+MPI Rank 2: configparameters: cntk.config:numCPUThreads=2
+MPI Rank 2: configparameters: cntk.config:parallelTrain=true
+MPI Rank 2: configparameters: cntk.config:precision=double
+MPI Rank 2: configparameters: cntk.config:RunDir=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu
+MPI Rank 2: configparameters: cntk.config:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1584,7 +2013,7 @@ MPI Rank 2:         CE = if trainingCriterion == 'CE'
 MPI Rank 2:              then CrossEntropyWithSoftmax(labels, outZ, tag='criterion')
 MPI Rank 2:              else Fail('unknown trainingCriterion ' + trainingCriterion)
 MPI Rank 2:         Err = if evalCriterion == 'Err' then
-MPI Rank 2:               ErrorPrediction(labels, outZ, tag='evaluation')
+MPI Rank 2:               ErrorPrediction(labels, outZ, tag='eval')
 MPI Rank 2:               else Fail('unknown evalCriterion ' + evalCriterion)
 MPI Rank 2:         logPrior = LogPrior(labels)
 MPI Rank 2:         // TODO: how to add a tag to an infix operation?
@@ -1628,262 +2057,543 @@ MPI Rank 2:             type = "real"
 MPI Rank 2:             scpFile = "glob_0000.scp"
 MPI Rank 2:         ]
 MPI Rank 2:         labels = [
-MPI Rank 2:             mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
-MPI Rank 2:             labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list"
+MPI Rank 2:             mlfFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf"
+MPI Rank 2:             labelMappingFile = "/home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list"
 MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
-MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 18:17:51: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 18:17:51: Commands: speechTrain
-MPI Rank 2: 05/03/2016 18:17:51: Precision = "double"
-MPI Rank 2: 05/03/2016 18:17:51: Using 8 CPU threads.
-MPI Rank 2: 05/03/2016 18:17:51: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 18:17:51: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 18:17:51: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: ##############################################################################
-MPI Rank 2: 05/03/2016 18:17:51: #                                                                            #
-MPI Rank 2: 05/03/2016 18:17:51: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 18:17:51: #                                                                            #
-MPI Rank 2: 05/03/2016 18:17:51: ##############################################################################
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: configparameters: cntk.config:stderr=/tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/stderr
+MPI Rank 2: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: command: speechTrain 
+MPI Rank 2: precision = double
+MPI Rank 2: Using 2 CPU threads
+MPI Rank 2: CNTKModelPath: /tmp/cntk-test-20160106232524.94334/Speech/DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@debug_gpu/models/cntkSpeech.dnn
+MPI Rank 2: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
-MPI Rank 2: total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
-MPI Rank 2: htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Creating virgin network.
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
+MPI Rank 2: total 132 state names in state list /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/state.list
+MPI Rank 2: htkmlfreader: reading MLF file /home/mluser/src/cplx_master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
 MPI Rank 2: 7 roots:
-MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
-MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction()
-MPI Rank 2: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 	MeanOfFeatures = Mean()
-MPI Rank 2: 	PosteriorProb = Softmax()
-MPI Rank 2: 	Prior = Mean()
-MPI Rank 2: 	ScaledLogLikelihood = Minus()
-MPI Rank 2: 
-MPI Rank 2: Validating network. 25 nodes to process in pass 1.
-MPI Rank 2: 
-MPI Rank 2: Validating --> labels = InputValue() :  -> [132 x *]
-MPI Rank 2: Validating --> W2 = LearnableParameter() :  -> [132 x 512]
-MPI Rank 2: Validating --> W1 = LearnableParameter() :  -> [512 x 512]
-MPI Rank 2: Validating --> W0 = LearnableParameter() :  -> [512 x 363]
-MPI Rank 2: Validating --> features = InputValue() :  -> [363 x *]
-MPI Rank 2: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
-MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
-MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
-MPI Rank 2: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
-MPI Rank 2: Validating --> B0 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 2: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 2: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> B1 = LearnableParameter() :  -> [512 x 1]
-MPI Rank 2: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
-MPI Rank 2: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
-MPI Rank 2: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
-MPI Rank 2: Validating --> B2 = LearnableParameter() :  -> [132 x 1]
-MPI Rank 2: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
-MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
-MPI Rank 2: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
-MPI Rank 2: Validating --> Prior = Mean (labels) : [132 x *] -> [132]
-MPI Rank 2: Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
-MPI Rank 2: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
-MPI Rank 2: 
-MPI Rank 2: Validating network. 17 nodes to process in pass 2.
+MPI Rank 2: 	MeanOfFeatures = Mean
+MPI Rank 2: 	InvStdOfFeatures = InvStdDev
+MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 	Prior = Mean
+MPI Rank 2: 	ScaledLogLikelihood = Minus
+MPI Rank 2: 	PosteriorProb = Softmax
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for MeanOfFeatures Mean operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for InvStdOfFeatures InvStdDev operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for CrossEntropyWithSoftmax CrossEntropyWithSoftmax operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for EvalErrorPrediction ErrorPrediction operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for Prior Mean operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for ScaledLogLikelihood Minus operation
+MPI Rank 2: FormNestedNetwork: WARNING: Was called twice for PosteriorProb Softmax operation
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: Validating network, final pass.
+MPI Rank 2: Validating for node MeanOfFeatures. 2 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node MeanOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node MeanOfFeatures, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
 MPI Rank 2: 
 MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures. 2 nodes to process in pass 1.
 MPI Rank 2: 
-MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node InvStdOfFeatures, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax. 20 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax. 10 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node CrossEntropyWithSoftmax, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction. 20 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction. 9 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node EvalErrorPrediction, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> EvalErrorPrediction = ErrorPrediction(labels[132, MBSize 0], HLast[132, MBSize 0]) -> [1 [1 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 9 out of 20 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior. 2 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior. 1 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: Validating for node Prior, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: 
+MPI Rank 2: 1 out of 2 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood. 22 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood. 10 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node ScaledLogLikelihood, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> labels = InputValue -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> Prior = Mean(labels[132, MBSize 0]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> LogOfPrior = Log(Prior[132, 1]) -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> ScaledLogLikelihood = Minus(HLast[132, MBSize 0], LogOfPrior[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: 10 out of 22 nodes do not share the minibatch layout with the input data.
+MPI Rank 2: 
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb. 19 nodes to process in pass 1.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb. 9 nodes to process in pass 2.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: Validating for node PosteriorProb, final verification.
+MPI Rank 2: 
+MPI Rank 2: Validating --> W2 = LearnableParameter -> [132 [132 {1}], 512]
+MPI Rank 2: Validating --> W1 = LearnableParameter -> [512 [512 {1}], 512]
+MPI Rank 2: Validating --> W0 = LearnableParameter -> [512 [512 {1}], 363]
+MPI Rank 2: Validating --> features = InputValue -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> MeanOfFeatures = Mean(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev(features[363, MBSize 0]) -> [363 [363 {1}], 1]
+MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization(features[363, MBSize 0], MeanOfFeatures[363, 1], InvStdOfFeatures[363, 1]) -> [363 [363 {1}], MBSize 0]
+MPI Rank 2: Validating --> W0*features = Times(W0[512, 363], MVNormalizedFeatures[363, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B0 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W0*features+B0 = Plus(W0*features[512, MBSize 0], B0[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H1 = Sigmoid(W0*features+B0[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W1*H1 = Times(W1[512, 512], H1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> B1 = LearnableParameter -> [512 [512 {1}], 1]
+MPI Rank 2: Validating --> W1*H1+B1 = Plus(W1*H1[512, MBSize 0], B1[512, 1]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> H2 = Sigmoid(W1*H1+B1[512, MBSize 0]) -> [512 [512 {1}], MBSize 0]
+MPI Rank 2: Validating --> W2*H1 = Times(W2[132, 512], H2[512, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> B2 = LearnableParameter -> [132 [132 {1}], 1]
+MPI Rank 2: Validating --> HLast = Plus(W2*H1[132, MBSize 0], B2[132, 1]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: Validating --> PosteriorProb = Softmax(HLast[132, MBSize 0]) -> [132 [132 {1}], MBSize 0]
+MPI Rank 2: 
+MPI Rank 2: 8 out of 19 nodes do not share the minibatch layout with the input data.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Created model with 25 nodes on GPU 0.
+MPI Rank 2: SGD using GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Training criterion node(s):
-MPI Rank 2: 05/03/2016 18:17:51: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: Training criterion node(s):
+MPI Rank 2: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Evaluation criterion node(s):
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: Evaluation criterion node(s):
+MPI Rank 2: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
 MPI Rank 2: 
-MPI Rank 2: Memory Sharing Structure:
+MPI Rank 2: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0x2ca7648: {[features Value[363 x *]] }
-MPI Rank 2: 0x395f668: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0x395fac8: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0x3960818: {[W0 Value[512 x 363]] }
-MPI Rank 2: 0x3a88f68: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0x3a89d38: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0x3a8aed8: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0x3a8bb88: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0x3a8c9b8: {[labels Value[132 x *]] }
-MPI Rank 2: 0x3a8dc18: {[Prior Value[132]] }
-MPI Rank 2: 0x3a934b8: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0x3a937b8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0x3a93978: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0x3a93e08: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0x3a93f78: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0x3a99578: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0x3d74c58: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0x3d75418: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0x3d75628: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0x3d75788: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x3d758e8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0x3d75aa8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0x3d75c68: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0x3d75e28: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0x3d76988: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0x3d76b48: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x3d76d08: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0x3d76ec8: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 	NodeName: InvStdOfFeatures
+MPI Rank 2: 	NodeName: MeanOfFeatures
+MPI Rank 2: 	NodeName: Prior
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
+MPI Rank 2: Precomputing --> Completed.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+MPI Rank 2: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:51: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 18:17:51: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 18:17:51: 	Prior = Mean()
+MPI Rank 2: Starting minibatch loop.
+MPI Rank 2: #PLUS#
+MPI Rank 2: Tensor Op: Op 15: 512 x 64 {1,512} op 512 x 1 {1,512} -> 512 x 64 {1,512}
+MPI Rank 2: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 2: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 2: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 2: 8 procs  32 warps  2147483647 65535 65535 max grid  on  GeForce GTX 690
+MPI Rank 2: #NLop5#
+MPI Rank 2: Tensor Op: Op 5: 512 x 64 {1,512} -> 512 x 64 {1,512}
+MPI Rank 2: #PLUSBP#
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: * 640; CrossEntropyWithSoftmax =  4.40318406; EvalErrorPrediction = 0.90468750; TotalTime = 0.2561s; SamplesPerSecond = 2498.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: * 640; CrossEntropyWithSoftmax =  4.15980357; EvalErrorPrediction = 0.87187500; TotalTime = 0.2183s; SamplesPerSecond = 2931.8
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: * 640; CrossEntropyWithSoftmax =  3.98424210; EvalErrorPrediction = 0.87812500; TotalTime = 0.2137s; SamplesPerSecond = 2995.0
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: * 640; CrossEntropyWithSoftmax =  3.86209050; EvalErrorPrediction = 0.87656250; TotalTime = 0.2167s; SamplesPerSecond = 2953.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: * 640; CrossEntropyWithSoftmax =  3.80597620; EvalErrorPrediction = 0.88593750; TotalTime = 0.2161s; SamplesPerSecond = 2961.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: * 640; CrossEntropyWithSoftmax =  3.73511552; EvalErrorPrediction = 0.87812500; TotalTime = 0.2156s; SamplesPerSecond = 2968.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: * 640; CrossEntropyWithSoftmax =  3.57260725; EvalErrorPrediction = 0.81875000; TotalTime = 0.2176s; SamplesPerSecond = 2941.8
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: * 640; CrossEntropyWithSoftmax =  3.42293687; EvalErrorPrediction = 0.80468750; TotalTime = 0.2178s; SamplesPerSecond = 2938.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: * 640; CrossEntropyWithSoftmax =  3.34304309; EvalErrorPrediction = 0.76718750; TotalTime = 0.2170s; SamplesPerSecond = 2949.8
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: * 640; CrossEntropyWithSoftmax =  3.37037793; EvalErrorPrediction = 0.84687500; TotalTime = 0.2165s; SamplesPerSecond = 2956.2
+MPI Rank 2: WARNING: The same matrix with dim [1, 1] has been transferred between different devices for 20 times.
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: * 640; CrossEntropyWithSoftmax =  3.21606065; EvalErrorPrediction = 0.76093750; TotalTime = 0.2197s; SamplesPerSecond = 2913.4
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: * 640; CrossEntropyWithSoftmax =  3.31610118; EvalErrorPrediction = 0.78437500; TotalTime = 0.2169s; SamplesPerSecond = 2950.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: * 640; CrossEntropyWithSoftmax =  3.14285888; EvalErrorPrediction = 0.75000000; TotalTime = 0.2173s; SamplesPerSecond = 2945.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: * 640; CrossEntropyWithSoftmax =  3.01821991; EvalErrorPrediction = 0.70937500; TotalTime = 0.2169s; SamplesPerSecond = 2950.2
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: * 640; CrossEntropyWithSoftmax =  3.01218944; EvalErrorPrediction = 0.73906250; TotalTime = 0.2186s; SamplesPerSecond = 2927.3
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: * 640; CrossEntropyWithSoftmax =  2.98947652; EvalErrorPrediction = 0.73593750; TotalTime = 0.2173s; SamplesPerSecond = 2945.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: * 640; CrossEntropyWithSoftmax =  2.86297716; EvalErrorPrediction = 0.70000000; TotalTime = 0.2175s; SamplesPerSecond = 2942.8
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: * 640; CrossEntropyWithSoftmax =  2.71901077; EvalErrorPrediction = 0.68593750; TotalTime = 0.2162s; SamplesPerSecond = 2960.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: * 640; CrossEntropyWithSoftmax =  2.80860596; EvalErrorPrediction = 0.71250000; TotalTime = 0.2183s; SamplesPerSecond = 2931.7
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: * 640; CrossEntropyWithSoftmax =  2.60590434; EvalErrorPrediction = 0.64687500; TotalTime = 0.2168s; SamplesPerSecond = 2952.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: * 640; CrossEntropyWithSoftmax =  2.63920069; EvalErrorPrediction = 0.66875000; TotalTime = 0.2177s; SamplesPerSecond = 2940.2
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: * 640; CrossEntropyWithSoftmax =  2.58372597; EvalErrorPrediction = 0.65781250; TotalTime = 0.2172s; SamplesPerSecond = 2947.1
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: * 640; CrossEntropyWithSoftmax =  2.50997096; EvalErrorPrediction = 0.62031250; TotalTime = 0.2183s; SamplesPerSecond = 2931.9
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: * 640; CrossEntropyWithSoftmax =  2.42126950; EvalErrorPrediction = 0.62968750; TotalTime = 0.2139s; SamplesPerSecond = 2992.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: * 640; CrossEntropyWithSoftmax =  2.40125789; EvalErrorPrediction = 0.65156250; TotalTime = 0.2068s; SamplesPerSecond = 3094.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: * 640; CrossEntropyWithSoftmax =  2.47110816; EvalErrorPrediction = 0.63281250; TotalTime = 0.2183s; SamplesPerSecond = 2931.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: * 640; CrossEntropyWithSoftmax =  2.33215267; EvalErrorPrediction = 0.60312500; TotalTime = 0.2161s; SamplesPerSecond = 2961.1
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: * 640; CrossEntropyWithSoftmax =  2.21936103; EvalErrorPrediction = 0.56875000; TotalTime = 0.2164s; SamplesPerSecond = 2957.8
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: * 640; CrossEntropyWithSoftmax =  2.31959580; EvalErrorPrediction = 0.61093750; TotalTime = 0.2157s; SamplesPerSecond = 2967.1
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: * 640; CrossEntropyWithSoftmax =  2.19592881; EvalErrorPrediction = 0.61718750; TotalTime = 0.2168s; SamplesPerSecond = 2951.5
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: * 640; CrossEntropyWithSoftmax =  2.28411654; EvalErrorPrediction = 0.60000000; TotalTime = 0.2191s; SamplesPerSecond = 2920.6
+MPI Rank 2:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: * 640; CrossEntropyWithSoftmax =  2.18307184; EvalErrorPrediction = 0.55781250; TotalTime = 0.2044s; SamplesPerSecond = 3130.7
+MPI Rank 2: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568; EvalErrorPrediction = 0.72426758; learningRatePerSample = 0.015625; EpochTime=7.00462
+MPI Rank 2: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:54: Precomputing --> Completed.
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.022031
+MPI Rank 2: Async gradient aggregation wait time: 0.007427
+MPI Rank 2: Actual gradient aggregation time: 0.033122
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: * 2304; CrossEntropyWithSoftmax =  2.08990053; EvalErrorPrediction = 0.56640625; TotalTime = 0.2872s; SamplesPerSecond = 8022.4
+MPI Rank 2: Async gradient aggregation wait time: 0.008906
+MPI Rank 2: Actual gradient aggregation time: 0.023597
+MPI Rank 2: Async gradient aggregation wait time: 0.007731
+MPI Rank 2: Actual gradient aggregation time: 0.031038
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: * 2560; CrossEntropyWithSoftmax =  2.16003887; EvalErrorPrediction = 0.58476562; TotalTime = 0.2858s; SamplesPerSecond = 8957.9
+MPI Rank 2: Async gradient aggregation wait time: 0.010559
+MPI Rank 2: Actual gradient aggregation time: 0.022847
+MPI Rank 2: Async gradient aggregation wait time: 0.007715
+MPI Rank 2: Actual gradient aggregation time: 0.01345
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: * 2560; CrossEntropyWithSoftmax =  2.19985756; EvalErrorPrediction = 0.59492188; TotalTime = 0.2764s; SamplesPerSecond = 9260.6
+MPI Rank 2: Async gradient aggregation wait time: 0.009341
+MPI Rank 2: Actual gradient aggregation time: 0.015912
+MPI Rank 2: Async gradient aggregation wait time: 0.00736
+MPI Rank 2: Actual gradient aggregation time: 0.026809
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: * 2560; CrossEntropyWithSoftmax =  2.12388714; EvalErrorPrediction = 0.57968750; TotalTime = 0.2787s; SamplesPerSecond = 9184.7
+MPI Rank 2: Async gradient aggregation wait time: 0.000492
+MPI Rank 2: Actual gradient aggregation time: 0.040623
+MPI Rank 2: Async gradient aggregation wait time: 0.006849
+MPI Rank 2: Actual gradient aggregation time: 0.006687
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: * 2560; CrossEntropyWithSoftmax =  2.05908444; EvalErrorPrediction = 0.57070312; TotalTime = 0.2835s; SamplesPerSecond = 9029.1
+MPI Rank 2: Async gradient aggregation wait time: 0.006498
+MPI Rank 2: Actual gradient aggregation time: 0.033143
+MPI Rank 2: Async gradient aggregation wait time: 0.009191
+MPI Rank 2: Actual gradient aggregation time: 0.019724
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: * 2560; CrossEntropyWithSoftmax =  2.13603725; EvalErrorPrediction = 0.57070312; TotalTime = 0.2989s; SamplesPerSecond = 8563.6
+MPI Rank 2: Async gradient aggregation wait time: 0.005047
+MPI Rank 2: Actual gradient aggregation time: 0.006069
+MPI Rank 2: Async gradient aggregation wait time: 0.008467
+MPI Rank 2: Actual gradient aggregation time: 0.023633
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: * 2560; CrossEntropyWithSoftmax =  2.09094421; EvalErrorPrediction = 0.56406250; TotalTime = 0.2856s; SamplesPerSecond = 8964.4
+MPI Rank 2: Async gradient aggregation wait time: 0.018896
+MPI Rank 2: Actual gradient aggregation time: 0.010287
+MPI Rank 2: Async gradient aggregation wait time: 0.006593
+MPI Rank 2: Actual gradient aggregation time: 0.029012
+MPI Rank 2:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: * 2560; CrossEntropyWithSoftmax =  2.02829896; EvalErrorPrediction = 0.56210938; TotalTime = 0.3084s; SamplesPerSecond = 8302.1
+MPI Rank 2: Async gradient aggregation wait time: 0.00403
+MPI Rank 2: Actual gradient aggregation time: 0.017647
+MPI Rank 2: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672; EvalErrorPrediction = 0.57392578; learningRatePerSample = 0.001953125; EpochTime=2.33332
+MPI Rank 2: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.007131
+MPI Rank 2: Actual gradient aggregation time: 0.049529
+MPI Rank 2: Async gradient aggregation wait time: 0.007958
+MPI Rank 2: Actual gradient aggregation time: 0.051621
+MPI Rank 2:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.99429576; EvalErrorPrediction = 0.54709201; TotalTime = 0.5153s; SamplesPerSecond = 17884.0
+MPI Rank 2: Async gradient aggregation wait time: 0.00777
+MPI Rank 2: Actual gradient aggregation time: 0.056468
+MPI Rank 2: Async gradient aggregation wait time: 0.001599
+MPI Rank 2: Actual gradient aggregation time: 0.0501
+MPI Rank 2:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.92530640; EvalErrorPrediction = 0.52812500; TotalTime = 0.5538s; SamplesPerSecond = 18490.0
+MPI Rank 2: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.958861; EvalErrorPrediction = 0.53725586; learningRatePerSample = 9.7656251e-05; EpochTime=1.12889
+MPI Rank 2: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:54: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:54: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.40318406 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.1911s; samplesPerSecond = 3348.3
-MPI Rank 2: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15980357 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0954s; samplesPerSecond = 6708.2
-MPI Rank 2: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98424210 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6689.6
-MPI Rank 2: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86209050 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0957s; samplesPerSecond = 6685.3
-MPI Rank 2: 05/03/2016 18:17:54:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80597620 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0958s; samplesPerSecond = 6683.2
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73511552 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0957s; samplesPerSecond = 6686.0
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57260725 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0957s; samplesPerSecond = 6688.1
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42293687 * 640; EvalErrorPrediction = 0.80468750 * 640; time = 0.0957s; samplesPerSecond = 6685.1
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.34304309 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.0958s; samplesPerSecond = 6679.5
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.37037793 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0946s; samplesPerSecond = 6762.0
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21606065 * 640; EvalErrorPrediction = 0.76093750 * 640; time = 0.0954s; samplesPerSecond = 6709.6
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31610118 * 640; EvalErrorPrediction = 0.78437500 * 640; time = 0.0956s; samplesPerSecond = 6694.8
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14285888 * 640; EvalErrorPrediction = 0.75000000 * 640; time = 0.0955s; samplesPerSecond = 6703.1
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.01821991 * 640; EvalErrorPrediction = 0.70937500 * 640; time = 0.0964s; samplesPerSecond = 6640.1
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.01218944 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0957s; samplesPerSecond = 6688.1
-MPI Rank 2: 05/03/2016 18:17:55:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.98947652 * 640; EvalErrorPrediction = 0.73593750 * 640; time = 0.0957s; samplesPerSecond = 6690.9
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.86297716 * 640; EvalErrorPrediction = 0.70000000 * 640; time = 0.0957s; samplesPerSecond = 6690.0
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.71901077 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.0947s; samplesPerSecond = 6758.0
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80860596 * 640; EvalErrorPrediction = 0.71250000 * 640; time = 0.0954s; samplesPerSecond = 6705.6
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.60590434 * 640; EvalErrorPrediction = 0.64687500 * 640; time = 0.0954s; samplesPerSecond = 6710.5
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.63920069 * 640; EvalErrorPrediction = 0.66875000 * 640; time = 0.0954s; samplesPerSecond = 6710.5
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.58372597 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0964s; samplesPerSecond = 6636.5
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.50997096 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0957s; samplesPerSecond = 6689.2
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.42126950 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0947s; samplesPerSecond = 6754.7
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.40125789 * 640; EvalErrorPrediction = 0.65156250 * 640; time = 0.0955s; samplesPerSecond = 6704.1
-MPI Rank 2: 05/03/2016 18:17:56:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.47110816 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0954s; samplesPerSecond = 6708.2
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.33215267 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.0939s; samplesPerSecond = 6815.1
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.21936103 * 640; EvalErrorPrediction = 0.56875000 * 640; time = 0.0950s; samplesPerSecond = 6739.1
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.31959580 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0950s; samplesPerSecond = 6739.2
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.19592881 * 640; EvalErrorPrediction = 0.61718750 * 640; time = 0.0950s; samplesPerSecond = 6740.0
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.28411654 * 640; EvalErrorPrediction = 0.60000000 * 640; time = 0.0949s; samplesPerSecond = 6743.9
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18307184 * 640; EvalErrorPrediction = 0.55781250 * 640; time = 0.0786s; samplesPerSecond = 8140.7
-MPI Rank 2: 05/03/2016 18:17:57: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 2.99723568 * 20480; EvalErrorPrediction = 0.72426758 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.17959s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:57: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.017695
-MPI Rank 2: Async gradient aggregation wait time: 0.000815
-MPI Rank 2: Actual gradient aggregation time: 0.008493
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08990053 * 2304; EvalErrorPrediction = 0.56640625 * 2304; time = 0.1337s; samplesPerSecond = 17237.5
-MPI Rank 2: Async gradient aggregation wait time: 0.010198
-MPI Rank 2: Actual gradient aggregation time: 0.015352
-MPI Rank 2: Async gradient aggregation wait time: 0.004744
-MPI Rank 2: Actual gradient aggregation time: 0.01425
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.16003887 * 2560; EvalErrorPrediction = 0.58476562 * 2560; time = 0.1483s; samplesPerSecond = 17262.9
-MPI Rank 2: Async gradient aggregation wait time: 0.005291
-MPI Rank 2: Actual gradient aggregation time: 0.011922
-MPI Rank 2: Async gradient aggregation wait time: 0.005256
-MPI Rank 2: Actual gradient aggregation time: 0.015381
-MPI Rank 2: 05/03/2016 18:17:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.19985756 * 2560; EvalErrorPrediction = 0.59492188 * 2560; time = 0.1433s; samplesPerSecond = 17866.0
-MPI Rank 2: Async gradient aggregation wait time: 0.005076
-MPI Rank 2: Actual gradient aggregation time: 0.014955
-MPI Rank 2: Async gradient aggregation wait time: 0.005206
-MPI Rank 2: Actual gradient aggregation time: 0.015138
-MPI Rank 2: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.12388714 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.1490s; samplesPerSecond = 17186.4
-MPI Rank 2: Async gradient aggregation wait time: 0.00572
-MPI Rank 2: Actual gradient aggregation time: 0.015121
-MPI Rank 2: Async gradient aggregation wait time: 0.005176
-MPI Rank 2: Actual gradient aggregation time: 0.015058
-MPI Rank 2: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.05908444 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1525s; samplesPerSecond = 16783.3
-MPI Rank 2: Async gradient aggregation wait time: 0.004624
-MPI Rank 2: Actual gradient aggregation time: 0.014696
-MPI Rank 2: Async gradient aggregation wait time: 0.00439
-MPI Rank 2: Actual gradient aggregation time: 0.01521
-MPI Rank 2: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.13603725 * 2560; EvalErrorPrediction = 0.57070312 * 2560; time = 0.1515s; samplesPerSecond = 16896.0
-MPI Rank 2: Async gradient aggregation wait time: 0.004575
-MPI Rank 2: Actual gradient aggregation time: 0.015683
-MPI Rank 2: Async gradient aggregation wait time: 0.00521
-MPI Rank 2: Actual gradient aggregation time: 0.015371
-MPI Rank 2: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.09094421 * 2560; EvalErrorPrediction = 0.56406250 * 2560; time = 0.1505s; samplesPerSecond = 17013.9
-MPI Rank 2: Async gradient aggregation wait time: 0.005028
-MPI Rank 2: Actual gradient aggregation time: 0.015318
-MPI Rank 2: Async gradient aggregation wait time: 0.005205
-MPI Rank 2: Actual gradient aggregation time: 0.015055
-MPI Rank 2: 05/03/2016 18:17:58:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.02829896 * 2560; EvalErrorPrediction = 0.56210938 * 2560; time = 0.1495s; samplesPerSecond = 17128.3
-MPI Rank 2: Async gradient aggregation wait time: 0.006398
-MPI Rank 2: Actual gradient aggregation time: 0.006532
-MPI Rank 2: 05/03/2016 18:17:58: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10928672 * 20480; EvalErrorPrediction = 0.57392578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.19376s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:58: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:58: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.024235
-MPI Rank 2: Actual gradient aggregation time: 0.028554
-MPI Rank 2: Async gradient aggregation wait time: 0.007245
-MPI Rank 2: Actual gradient aggregation time: 0.029245
-MPI Rank 2: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.99429576 * 9216; EvalErrorPrediction = 0.54709201 * 9216; time = 0.2692s; samplesPerSecond = 34239.7
-MPI Rank 2: Async gradient aggregation wait time: 0.002789
-MPI Rank 2: Actual gradient aggregation time: 0.028375
-MPI Rank 2: Async gradient aggregation wait time: 0.005983
-MPI Rank 2: Actual gradient aggregation time: 0.024917
-MPI Rank 2: 05/03/2016 18:17:59:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.92530640 * 10240; EvalErrorPrediction = 0.52812500 * 10240; time = 0.2555s; samplesPerSecond = 40073.7
-MPI Rank 2: 05/03/2016 18:17:59: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.95886100 * 20480; EvalErrorPrediction = 0.53725586 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.543637s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:59: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.013656
-MPI Rank 2: Actual gradient aggregation time: 0.030699
-MPI Rank 2: Async gradient aggregation wait time: 0.000605
-MPI Rank 2: Actual gradient aggregation time: 0.024596
-MPI Rank 2: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90011431 * 9216; EvalErrorPrediction = 0.51725260 * 9216; time = 0.2712s; samplesPerSecond = 33987.9
-MPI Rank 2: Async gradient aggregation wait time: 0.009065
-MPI Rank 2: Actual gradient aggregation time: 0.029622
-MPI Rank 2: Async gradient aggregation wait time: 6e-06
-MPI Rank 2: Actual gradient aggregation time: 0.012457
-MPI Rank 2: 05/03/2016 18:17:59:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88429973 * 10240; EvalErrorPrediction = 0.52099609 * 10240; time = 0.2740s; samplesPerSecond = 37366.5
-MPI Rank 2: Async gradient aggregation wait time: 0.006131
-MPI Rank 2: 05/03/2016 18:17:59: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911 * 20480; EvalErrorPrediction = 0.51933594 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-05; epochTime=0.56187s
-MPI Rank 2: 05/03/2016 18:17:59: CNTKCommandTrainEnd: speechTrain
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:17:59: Action "train" complete.
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 18:18:00: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.000481
+MPI Rank 2: Actual gradient aggregation time: 0.04614
+MPI Rank 2: Async gradient aggregation wait time: 0.011101
+MPI Rank 2: Actual gradient aggregation time: 0.039715
+MPI Rank 2:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: * 9216; CrossEntropyWithSoftmax =  1.90011431; EvalErrorPrediction = 0.51725260; TotalTime = 0.4844s; SamplesPerSecond = 19023.7
+MPI Rank 2: Async gradient aggregation wait time: 0.007438
+MPI Rank 2: Actual gradient aggregation time: 0.05382
+MPI Rank 2: Async gradient aggregation wait time: 7e-06
+MPI Rank 2: Actual gradient aggregation time: 0.052382
+MPI Rank 2:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: * 10240; CrossEntropyWithSoftmax =  1.88429973; EvalErrorPrediction = 0.52099609; TotalTime = 0.5637s; SamplesPerSecond = 18166.3
+MPI Rank 2: Async gradient aggregation wait time: 0.005346
+MPI Rank 2: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.89248911; EvalErrorPrediction = 0.51933594; learningRatePerSample = 9.7656251e-05; EpochTime=1.10805
+MPI Rank 2: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: __COMPLETED__
+MPI Rank 2: ~MPIWrapper
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
index 46c748c13..fdd2dd298 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -67,12 +67,12 @@ ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
 mpihelper: we are cog 1 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
+ping [requestnodes (after change)]: all 3 nodes responded
+ping [mpihelper]: 3 nodes pinging each other
+mpihelper: we are cog 2 in a gearbox of 3
 mpihelper: we are cog 0 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
@@ -91,32 +91,32 @@ job aborted:
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 14:26:13: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 14:26:13: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 14:26:13: Build info: 
+MPI Rank 0: 05/03/2016 14:22:38: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:22:38: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:22:38: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: 		Built time: May  3 2016 13:23:06
-MPI Rank 0: 05/03/2016 14:26:13: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 0: 05/03/2016 14:26:13: 		Build type: Release
-MPI Rank 0: 05/03/2016 14:26:13: 		Build target: GPU
-MPI Rank 0: 05/03/2016 14:26:13: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 14:26:13: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 14:26:13: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 14:26:13: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 14:26:13: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 14:26:13: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 14:26:13: 		Built by svcphil on LIANA-09-w
-MPI Rank 0: 05/03/2016 14:26:13: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 14:26:13: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:22:38: 		Built time: May  3 2016 13:23:06
+MPI Rank 0: 05/03/2016 14:22:38: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 0: 05/03/2016 14:22:38: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:22:38: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:22:38: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:22:38: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:22:38: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:22:38: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:22:38: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:22:38: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:22:38: 		Built by svcphil on LIANA-09-w
+MPI Rank 0: 05/03/2016 14:22:38: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:22:38: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Running on cntk-muc02 at 2016/05/03 14:26:13
-MPI Rank 0: 05/03/2016 14:26:13: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: 05/03/2016 14:22:38: Running on cntk-muc02 at 2016/05/03 14:22:38
+MPI Rank 0: 05/03/2016 14:22:38: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:26:13: precision = "float"
+MPI Rank 0: 05/03/2016 14:22:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:38: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -206,14 +206,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -221,18 +219,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:26:13: precision = "float"
+MPI Rank 0: 05/03/2016 14:22:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:38: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = -1
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -316,14 +314,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: DeviceId=-1
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -331,24 +327,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:22:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = -1
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -430,35 +426,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 14:26:13: Commands: speechTrain
-MPI Rank 0: 05/03/2016 14:26:13: Precision = "double"
-MPI Rank 0: 05/03/2016 14:26:13: Using 1 CPU threads.
-MPI Rank 0: 05/03/2016 14:26:13: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 14:26:13: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 14:26:13: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 14:22:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:22:38: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:22:38: Precision = "double"
+MPI Rank 0: 05/03/2016 14:22:38: Using 1 CPU threads.
+MPI Rank 0: 05/03/2016 14:22:38: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:22:38: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 14:22:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: ##############################################################################
-MPI Rank 0: 05/03/2016 14:26:13: #                                                                            #
-MPI Rank 0: 05/03/2016 14:26:13: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 14:26:13: #                                                                            #
-MPI Rank 0: 05/03/2016 14:26:13: ##############################################################################
+MPI Rank 0: 05/03/2016 14:22:38: ##############################################################################
+MPI Rank 0: 05/03/2016 14:22:38: #                                                                            #
+MPI Rank 0: 05/03/2016 14:22:38: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:22:38: #                                                                            #
+MPI Rank 0: 05/03/2016 14:22:38: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:22:38: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using CPU
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:22:39: Creating virgin network.
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
 MPI Rank 0: 
@@ -510,14 +505,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Created model with 25 nodes on CPU.
+MPI Rank 0: 05/03/2016 14:22:39: Created model with 25 nodes on CPU.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Training criterion node(s):
-MPI Rank 0: 05/03/2016 14:26:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:22:39: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:22:39: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:22:39: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:22:39: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -525,193 +520,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 000000D7522D9DF0: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 000000D7522D9E90: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 000000D7522DA070: {[B0 Value[512 x 1]] }
-MPI Rank 0: 000000D7522DA110: {[W1 Value[512 x 512]] }
-MPI Rank 0: 000000D7522DA1B0: {[B1 Value[512 x 1]] }
-MPI Rank 0: 000000D7522DA610: {[W0 Value[512 x 363]] }
-MPI Rank 0: 000000D7522DA890: {[features Value[363 x *]] }
-MPI Rank 0: 000000D7522DDFA0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D7522DE680: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 000000D7522DE7C0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 000000D7522DE860: {[W0*features Value[512 x *]] }
-MPI Rank 0: 000000D7522DE900: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 000000D7522DE9A0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 000000D7522DEAE0: {[B2 Value[132 x 1]] }
-MPI Rank 0: 000000D7522DEB80: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D7522DEE00: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 000000D7522DEEA0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000D7522DEF40: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000D7522DEFE0: {[Prior Value[132]] }
-MPI Rank 0: 000000D7522DF080: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 000000D7522DF300: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 000000D7522DF440: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 000000D7522DF620: {[labels Value[132 x *]] }
-MPI Rank 0: 000000D7522DF8A0: {[W2 Value[132 x 512]] }
-MPI Rank 0: 000000D7522DFA80: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000D7522DFB20: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 000000D7522DFC60: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 000000D7522DFE40: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000D9777B2240: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 000000D9777B2380: {[W0 Value[512 x 363]] }
+MPI Rank 0: 000000D9777B2420: {[B0 Value[512 x 1]] }
+MPI Rank 0: 000000D9777B2740: {[features Value[363 x *]] }
+MPI Rank 0: 000000D9777B27E0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 000000D9777CB530: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 000000D9777CB5D0: {[B1 Value[512 x 1]] }
+MPI Rank 0: 000000D9777CB850: {[W2 Value[132 x 512]] }
+MPI Rank 0: 000000D9777CB8F0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 000000D9777CBB70: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 000000D9777CBDF0: {[W0*features Value[512 x *]] }
+MPI Rank 0: 000000D9777CC110: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000D9777CC250: {[W1 Value[512 x 512]] }
+MPI Rank 0: 000000D9777CC430: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 000000D9777CC4D0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000D9777CC570: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 000000D9777CC610: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 000000D9777CC6B0: {[B2 Value[132 x 1]] }
+MPI Rank 0: 000000D9777CC890: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 000000D9777CCB10: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000D9777CCBB0: {[labels Value[132 x *]] }
+MPI Rank 0: 000000D9777CCD90: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 000000D9777CCF70: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 000000D9777CD1F0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000D9777CD290: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000D9777CD330: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000D9777CD3D0: {[Prior Value[132]] }
+MPI Rank 0: 000000D980AFB530: {[B2 Gradient[132 x 1]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:22:39: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:13: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 14:26:13: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 14:26:13: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:22:39: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:22:39: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:22:39: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:15: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:22:40: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:22:42: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:16: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 14:26:17:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7869s; samplesPerSecond = 813.4
-MPI Rank 0: 05/03/2016 14:26:17:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6436s; samplesPerSecond = 994.4
-MPI Rank 0: 05/03/2016 14:26:18:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6624s; samplesPerSecond = 966.2
-MPI Rank 0: 05/03/2016 14:26:18:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6622s; samplesPerSecond = 966.5
-MPI Rank 0: 05/03/2016 14:26:19:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6651s; samplesPerSecond = 962.3
-MPI Rank 0: 05/03/2016 14:26:20:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6858s; samplesPerSecond = 933.3
-MPI Rank 0: 05/03/2016 14:26:20:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6218s; samplesPerSecond = 1029.3
-MPI Rank 0: 05/03/2016 14:26:21:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6855s; samplesPerSecond = 933.6
-MPI Rank 0: 05/03/2016 14:26:22:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.7050s; samplesPerSecond = 907.8
-MPI Rank 0: 05/03/2016 14:26:23:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7183s; samplesPerSecond = 891.0
-MPI Rank 0: 05/03/2016 14:26:23:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6391s; samplesPerSecond = 1001.5
-MPI Rank 0: 05/03/2016 14:26:24:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6413s; samplesPerSecond = 998.0
-MPI Rank 0: 05/03/2016 14:26:25:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6751s; samplesPerSecond = 948.0
-MPI Rank 0: 05/03/2016 14:26:25:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6451s; samplesPerSecond = 992.1
-MPI Rank 0: 05/03/2016 14:26:26:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6426s; samplesPerSecond = 996.0
-MPI Rank 0: 05/03/2016 14:26:26:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6614s; samplesPerSecond = 967.6
-MPI Rank 0: 05/03/2016 14:26:27:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6606s; samplesPerSecond = 968.8
-MPI Rank 0: 05/03/2016 14:26:28:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6897s; samplesPerSecond = 927.9
-MPI Rank 0: 05/03/2016 14:26:28:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6445s; samplesPerSecond = 993.1
-MPI Rank 0: 05/03/2016 14:26:29:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6575s; samplesPerSecond = 973.4
-MPI Rank 0: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6434s; samplesPerSecond = 994.8
-MPI Rank 0: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6639s; samplesPerSecond = 964.0
-MPI Rank 0: 05/03/2016 14:26:31:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6844s; samplesPerSecond = 935.1
-MPI Rank 0: 05/03/2016 14:26:32:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.5747s; samplesPerSecond = 1113.6
-MPI Rank 0: 05/03/2016 14:26:32:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6221s; samplesPerSecond = 1028.7
-MPI Rank 0: 05/03/2016 14:26:33:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6428s; samplesPerSecond = 995.6
-MPI Rank 0: 05/03/2016 14:26:34:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6671s; samplesPerSecond = 959.4
-MPI Rank 0: 05/03/2016 14:26:34:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7081s; samplesPerSecond = 903.8
-MPI Rank 0: 05/03/2016 14:26:35:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6845s; samplesPerSecond = 934.9
-MPI Rank 0: 05/03/2016 14:26:36:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6159s; samplesPerSecond = 1039.1
-MPI Rank 0: 05/03/2016 14:26:36:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6705s; samplesPerSecond = 954.4
-MPI Rank 0: 05/03/2016 14:26:37:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5216s; samplesPerSecond = 1227.1
-MPI Rank 0: 05/03/2016 14:26:37: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.1766s
-MPI Rank 0: 05/03/2016 14:26:37: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:22:42: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 14:22:42:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.6498s; samplesPerSecond = 985.0
+MPI Rank 0: 05/03/2016 14:22:43:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6726s; samplesPerSecond = 951.6
+MPI Rank 0: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6738s; samplesPerSecond = 949.8
+MPI Rank 0: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6704s; samplesPerSecond = 954.6
+MPI Rank 0: 05/03/2016 14:22:45:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6946s; samplesPerSecond = 921.4
+MPI Rank 0: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6035s; samplesPerSecond = 1060.5
+MPI Rank 0: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6447s; samplesPerSecond = 992.7
+MPI Rank 0: 05/03/2016 14:22:47:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6705s; samplesPerSecond = 954.5
+MPI Rank 0: 05/03/2016 14:22:47:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6241s; samplesPerSecond = 1025.5
+MPI Rank 0: 05/03/2016 14:22:48:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6694s; samplesPerSecond = 956.1
+MPI Rank 0: 05/03/2016 14:22:49:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7142s; samplesPerSecond = 896.1
+MPI Rank 0: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.7367s; samplesPerSecond = 868.7
+MPI Rank 0: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6461s; samplesPerSecond = 990.5
+MPI Rank 0: 05/03/2016 14:22:51:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6463s; samplesPerSecond = 990.2
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6899s; samplesPerSecond = 927.7
+MPI Rank 0: 05/03/2016 14:22:52:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6673s; samplesPerSecond = 959.1
+MPI Rank 0: 05/03/2016 14:22:53:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7026s; samplesPerSecond = 910.9
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6898s; samplesPerSecond = 927.8
+MPI Rank 0: 05/03/2016 14:22:54:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6885s; samplesPerSecond = 929.6
+MPI Rank 0: 05/03/2016 14:22:55:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6903s; samplesPerSecond = 927.1
+MPI Rank 0: 05/03/2016 14:22:56:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6721s; samplesPerSecond = 952.2
+MPI Rank 0: 05/03/2016 14:22:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6713s; samplesPerSecond = 953.4
+MPI Rank 0: 05/03/2016 14:22:57:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6651s; samplesPerSecond = 962.3
+MPI Rank 0: 05/03/2016 14:22:58:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7173s; samplesPerSecond = 892.3
+MPI Rank 0: 05/03/2016 14:22:58:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6812s; samplesPerSecond = 939.5
+MPI Rank 0: 05/03/2016 14:22:59:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6004s; samplesPerSecond = 1065.9
+MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.7285s; samplesPerSecond = 878.5
+MPI Rank 0: 05/03/2016 14:23:00:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6881s; samplesPerSecond = 930.1
+MPI Rank 0: 05/03/2016 14:23:01:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6026s; samplesPerSecond = 1062.1
+MPI Rank 0: 05/03/2016 14:23:02:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6955s; samplesPerSecond = 920.2
+MPI Rank 0: 05/03/2016 14:23:02:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6723s; samplesPerSecond = 951.9
+MPI Rank 0: 05/03/2016 14:23:03:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.5298s; samplesPerSecond = 1208.0
+MPI Rank 0: 05/03/2016 14:23:03: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.3811s
+MPI Rank 0: 05/03/2016 14:23:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:23:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.015954
+MPI Rank 0: 05/03/2016 14:23:03: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.02506
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008828
-MPI Rank 0: 05/03/2016 14:26:38:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.8632s; samplesPerSecond = 2669.3
+MPI Rank 0: Actual gradient aggregation time: 0.007708
+MPI Rank 0: 05/03/2016 14:23:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.9063s; samplesPerSecond = 2542.1
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.009038
+MPI Rank 0: Actual gradient aggregation time: 0.00979
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.007919
+MPI Rank 0: 05/03/2016 14:23:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.8658s; samplesPerSecond = 2956.9
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.007772
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.006283
-MPI Rank 0: 05/03/2016 14:26:39:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.8871s; samplesPerSecond = 2885.9
+MPI Rank 0: Actual gradient aggregation time: 0.012333
+MPI Rank 0: 05/03/2016 14:23:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.8650s; samplesPerSecond = 2959.6
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.013184
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.006567
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.033361
-MPI Rank 0: 05/03/2016 14:26:40:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.8711s; samplesPerSecond = 2939.0
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.034582
+MPI Rank 0: Actual gradient aggregation time: 0.007769
+MPI Rank 0: 05/03/2016 14:23:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.8701s; samplesPerSecond = 2942.3
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.035211
-MPI Rank 0: 05/03/2016 14:26:41:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.9223s; samplesPerSecond = 2775.7
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008541
+MPI Rank 0: Actual gradient aggregation time: 0.013592
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.00853
-MPI Rank 0: 05/03/2016 14:26:42:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8845s; samplesPerSecond = 2894.2
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008017
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.007908
-MPI Rank 0: 05/03/2016 14:26:43:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.8946s; samplesPerSecond = 2861.8
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008226
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008146
-MPI Rank 0: 05/03/2016 14:26:44:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8934s; samplesPerSecond = 2865.5
-MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.008149
+MPI Rank 0: Actual gradient aggregation time: 0.007823
+MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8656s; samplesPerSecond = 2957.4
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.007957
-MPI Rank 0: 05/03/2016 14:26:44:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.8461s; samplesPerSecond = 3025.7
-MPI Rank 0: Async gradient aggregation wait time: 0.008626
-MPI Rank 0: Actual gradient aggregation time: 0.007623
-MPI Rank 0: 05/03/2016 14:26:44: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.08817s
-MPI Rank 0: 05/03/2016 14:26:44: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: Actual gradient aggregation time: 0.013979
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.053962
+MPI Rank 0: 05/03/2016 14:23:08:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.7879s; samplesPerSecond = 3249.2
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.063286
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.064054
+MPI Rank 0: 05/03/2016 14:23:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.9321s; samplesPerSecond = 2746.6
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.048655
+MPI Rank 0: Async gradient aggregation wait time: 1e-006
+MPI Rank 0: Actual gradient aggregation time: 0.063976
+MPI Rank 0: 05/03/2016 14:23:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.8484s; samplesPerSecond = 3017.3
+MPI Rank 0: Async gradient aggregation wait time: 0.040906
+MPI Rank 0: Actual gradient aggregation time: 0.007657
+MPI Rank 0: 05/03/2016 14:23:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=6.99722s
+MPI Rank 0: 05/03/2016 14:23:10: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:44: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:23:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:44: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:23:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.09624
+MPI Rank 0: Actual gradient aggregation time: 0.243542
+MPI Rank 0: Async gradient aggregation wait time: 0.064816
+MPI Rank 0: Actual gradient aggregation time: 0.308121
+MPI Rank 0: 05/03/2016 14:23:13:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 3.0835s; samplesPerSecond = 2988.9
+MPI Rank 0: Async gradient aggregation wait time: 0.1223
+MPI Rank 0: Actual gradient aggregation time: 0.295289
+MPI Rank 0: Async gradient aggregation wait time: 0.129929
+MPI Rank 0: Actual gradient aggregation time: 0.139532
+MPI Rank 0: 05/03/2016 14:23:16:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 3.0472s; samplesPerSecond = 3360.5
+MPI Rank 0: 05/03/2016 14:23:16: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.19742s
+MPI Rank 0: 05/03/2016 14:23:16: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 14:23:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
+MPI Rank 0: 
+MPI Rank 0: 05/03/2016 14:23:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.202478
-MPI Rank 0: Async gradient aggregation wait time: 0.069043
-MPI Rank 0: Actual gradient aggregation time: 0.304059
-MPI Rank 0: 05/03/2016 14:26:47:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 2.9198s; samplesPerSecond = 3156.4
-MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.215409
-MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.207205
-MPI Rank 0: 05/03/2016 14:26:50:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 3.0395s; samplesPerSecond = 3369.0
-MPI Rank 0: 05/03/2016 14:26:51: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.10977s
-MPI Rank 0: 05/03/2016 14:26:51: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3'
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:51: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.235952
-MPI Rank 0: Async gradient aggregation wait time: 0.126071
-MPI Rank 0: Actual gradient aggregation time: 0.30028
-MPI Rank 0: 05/03/2016 14:26:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 3.0316s; samplesPerSecond = 3040.0
-MPI Rank 0: Async gradient aggregation wait time: 3e-006
-MPI Rank 0: Actual gradient aggregation time: 0.169292
+MPI Rank 0: Actual gradient aggregation time: 0.227833
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
-MPI Rank 0: Actual gradient aggregation time: 0.171773
-MPI Rank 0: 05/03/2016 14:26:57:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 3.0035s; samplesPerSecond = 3409.3
-MPI Rank 0: Async gradient aggregation wait time: 0.012767
-MPI Rank 0: 05/03/2016 14:26:57: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.1162s
-MPI Rank 0: 05/03/2016 14:26:57: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 14:26:57: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: Actual gradient aggregation time: 0.253637
+MPI Rank 0: 05/03/2016 14:23:19:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 2.9162s; samplesPerSecond = 3160.3
+MPI Rank 0: Async gradient aggregation wait time: 2e-006
+MPI Rank 0: Actual gradient aggregation time: 0.10078
+MPI Rank 0: Async gradient aggregation wait time: 3e-006
+MPI Rank 0: Actual gradient aggregation time: 0.130631
+MPI Rank 0: 05/03/2016 14:23:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 3.1254s; samplesPerSecond = 3276.4
+MPI Rank 0: Async gradient aggregation wait time: 0.016134
+MPI Rank 0: 05/03/2016 14:23:23: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.13278s
+MPI Rank 0: 05/03/2016 14:23:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:57: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:23:23: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:57: __COMPLETED__
-MPI Rank 1: 05/03/2016 14:26:13: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 14:26:13: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 14:26:13: Build info: 
+MPI Rank 0: 05/03/2016 14:23:23: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:22:39: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:22:39: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:22:39: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: 		Built time: May  3 2016 13:23:06
-MPI Rank 1: 05/03/2016 14:26:13: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 1: 05/03/2016 14:26:13: 		Build type: Release
-MPI Rank 1: 05/03/2016 14:26:13: 		Build target: GPU
-MPI Rank 1: 05/03/2016 14:26:13: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 14:26:13: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 14:26:13: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 14:26:13: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 14:26:13: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 14:26:13: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 14:26:13: 		Built by svcphil on LIANA-09-w
-MPI Rank 1: 05/03/2016 14:26:13: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 14:26:13: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:22:39: 		Built time: May  3 2016 13:23:06
+MPI Rank 1: 05/03/2016 14:22:39: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 1: 05/03/2016 14:22:39: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:22:39: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:22:39: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:22:39: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:22:39: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:22:39: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:22:39: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:22:39: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:22:39: 		Built by svcphil on LIANA-09-w
+MPI Rank 1: 05/03/2016 14:22:39: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:22:39: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: Running on cntk-muc02 at 2016/05/03 14:26:13
-MPI Rank 1: 05/03/2016 14:26:13: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: 05/03/2016 14:22:39: Running on cntk-muc02 at 2016/05/03 14:22:39
+MPI Rank 1: 05/03/2016 14:22:39: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:26:13: precision = "float"
+MPI Rank 1: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:39: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -801,14 +802,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -816,18 +815,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:26:13: precision = "float"
+MPI Rank 1: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:39: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = -1
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -911,14 +910,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: DeviceId=-1
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -926,24 +923,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = -1
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1025,35 +1022,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 14:26:13: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 14:26:13: Commands: speechTrain
-MPI Rank 1: 05/03/2016 14:26:13: Precision = "double"
-MPI Rank 1: 05/03/2016 14:26:13: Using 1 CPU threads.
-MPI Rank 1: 05/03/2016 14:26:13: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 14:26:13: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 14:26:13: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:22:39: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:22:39: Precision = "double"
+MPI Rank 1: 05/03/2016 14:22:39: Using 1 CPU threads.
+MPI Rank 1: 05/03/2016 14:22:39: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:22:39: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 14:22:39: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: ##############################################################################
-MPI Rank 1: 05/03/2016 14:26:13: #                                                                            #
-MPI Rank 1: 05/03/2016 14:26:13: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 14:26:13: #                                                                            #
-MPI Rank 1: 05/03/2016 14:26:13: ##############################################################################
+MPI Rank 1: 05/03/2016 14:22:39: ##############################################################################
+MPI Rank 1: 05/03/2016 14:22:39: #                                                                            #
+MPI Rank 1: 05/03/2016 14:22:39: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:22:39: #                                                                            #
+MPI Rank 1: 05/03/2016 14:22:39: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:13: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:22:39: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using CPU
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:22:39: Creating virgin network.
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
 MPI Rank 1: 
@@ -1105,14 +1101,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: Created model with 25 nodes on CPU.
+MPI Rank 1: 05/03/2016 14:22:39: Created model with 25 nodes on CPU.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: Training criterion node(s):
-MPI Rank 1: 05/03/2016 14:26:14: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:22:39: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:22:39: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:22:39: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:22:39: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1120,189 +1116,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 000000A3BAC0F120: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 000000A3BAC0F1C0: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 000000A3BAC0F260: {[features Value[363 x *]] }
-MPI Rank 1: 000000A3BAC0F440: {[W0 Value[512 x 363]] }
-MPI Rank 1: 000000A3BAC397F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 000000A3BAC3A5B0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 1: 000000A3BAC3A8D0: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 000000A3BAC3A970: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 000000A3BACA5B30: {[labels Value[132 x *]] }
-MPI Rank 1: 000000A3BACA5DB0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 000000A3BACA5E50: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 000000A3BACA6030: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 000000A3BACA60D0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 000000A3BACA6210: {[B2 Value[132 x 1]] }
-MPI Rank 1: 000000A3BACA63F0: {[W2 Value[132 x 512]] }
-MPI Rank 1: 000000A3BACA6490: {[B1 Value[512 x 1]] }
-MPI Rank 1: 000000A3BACA67B0: {[B0 Value[512 x 1]] }
-MPI Rank 1: 000000A3BACA6990: {[Prior Value[132]] }
-MPI Rank 1: 000000A3BACA6D50: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 000000A3BACA6DF0: {[W0*features Value[512 x *]] }
-MPI Rank 1: 000000A3BACA6E90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 000000A3BACA6F30: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 000000A3BACA7110: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 000000A3BACA72F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 000000A3BACA7390: {[W1 Value[512 x 512]] }
-MPI Rank 1: 000000A3BACA7430: {[LogOfPrior Value[132]] }
-MPI Rank 1: 000000A3BACA7750: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 000000A3BACA77F0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000ED173AFBF0: {[features Value[363 x *]] }
+MPI Rank 1: 000000ED173AFD30: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 000000ED173B0050: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000ED173B0370: {[B0 Value[512 x 1]] }
+MPI Rank 1: 000000ED173B0690: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 000000ED173C7050: {[Prior Value[132]] }
+MPI Rank 1: 000000ED173C7190: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000ED173C7410: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000ED173C7550: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000ED173C75F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000ED173C7690: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000ED173C7B90: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000ED173C7CD0: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000ED173C7E10: {[W1 Value[512 x 512]] }
+MPI Rank 1: 000000ED173C7EB0: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000ED173C8310: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000ED173C83B0: {[labels Value[132 x *]] }
+MPI Rank 1: 000000ED173C8450: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000ED173C8590: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000ED173C8630: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000ED173C8770: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000ED173C8810: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000ED173C88B0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000ED173C8950: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000ED173C8A90: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 000000ED173C8DB0: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000ED173C8E50: {[LogOfPrior Value[132]] }
+MPI Rank 1: 000000ED20631AC0: {[B2 Gradient[132 x 1]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:22:39: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:14: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 14:26:14: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 14:26:14: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:22:39: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:22:39: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:22:39: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:15: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:22:41: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:22:42: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:16: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 14:26:16:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.7638s; samplesPerSecond = 837.9
-MPI Rank 1: 05/03/2016 14:26:17:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6436s; samplesPerSecond = 994.4
-MPI Rank 1: 05/03/2016 14:26:18:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6406s; samplesPerSecond = 999.1
-MPI Rank 1: 05/03/2016 14:26:18:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6622s; samplesPerSecond = 966.5
-MPI Rank 1: 05/03/2016 14:26:19:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6653s; samplesPerSecond = 961.9
-MPI Rank 1: 05/03/2016 14:26:20:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6191s; samplesPerSecond = 1033.7
-MPI Rank 1: 05/03/2016 14:26:20:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7107s; samplesPerSecond = 900.5
-MPI Rank 1: 05/03/2016 14:26:21:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6400s; samplesPerSecond = 1000.0
-MPI Rank 1: 05/03/2016 14:26:22:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.5958s; samplesPerSecond = 1074.2
-MPI Rank 1: 05/03/2016 14:26:22:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.7414s; samplesPerSecond = 863.3
-MPI Rank 1: 05/03/2016 14:26:23:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6153s; samplesPerSecond = 1040.1
-MPI Rank 1: 05/03/2016 14:26:24:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6418s; samplesPerSecond = 997.2
-MPI Rank 1: 05/03/2016 14:26:24:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6736s; samplesPerSecond = 950.2
-MPI Rank 1: 05/03/2016 14:26:25:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6680s; samplesPerSecond = 958.1
-MPI Rank 1: 05/03/2016 14:26:26:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6884s; samplesPerSecond = 929.7
-MPI Rank 1: 05/03/2016 14:26:26:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6608s; samplesPerSecond = 968.5
-MPI Rank 1: 05/03/2016 14:26:27:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6605s; samplesPerSecond = 969.0
-MPI Rank 1: 05/03/2016 14:26:28:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.5771s; samplesPerSecond = 1109.0
-MPI Rank 1: 05/03/2016 14:26:28:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6469s; samplesPerSecond = 989.4
-MPI Rank 1: 05/03/2016 14:26:29:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6442s; samplesPerSecond = 993.5
-MPI Rank 1: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6543s; samplesPerSecond = 978.1
-MPI Rank 1: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.5769s; samplesPerSecond = 1109.5
-MPI Rank 1: 05/03/2016 14:26:31:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6405s; samplesPerSecond = 999.2
-MPI Rank 1: 05/03/2016 14:26:31:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7073s; samplesPerSecond = 904.9
-MPI Rank 1: 05/03/2016 14:26:32:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6199s; samplesPerSecond = 1032.4
-MPI Rank 1: 05/03/2016 14:26:33:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6452s; samplesPerSecond = 991.9
-MPI Rank 1: 05/03/2016 14:26:33:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6669s; samplesPerSecond = 959.6
-MPI Rank 1: 05/03/2016 14:26:34:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6182s; samplesPerSecond = 1035.3
-MPI Rank 1: 05/03/2016 14:26:35:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.6433s; samplesPerSecond = 994.9
-MPI Rank 1: 05/03/2016 14:26:35:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6161s; samplesPerSecond = 1038.8
-MPI Rank 1: 05/03/2016 14:26:36:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.5989s; samplesPerSecond = 1068.6
-MPI Rank 1: 05/03/2016 14:26:37:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6446s; samplesPerSecond = 992.9
-MPI Rank 1: 05/03/2016 14:26:37: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=20.876s
+MPI Rank 1: 05/03/2016 14:22:42: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 14:22:42:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.6968s; samplesPerSecond = 918.5
+MPI Rank 1: 05/03/2016 14:22:43:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.6757s; samplesPerSecond = 947.2
+MPI Rank 1: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6714s; samplesPerSecond = 953.2
+MPI Rank 1: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6472s; samplesPerSecond = 988.8
+MPI Rank 1: 05/03/2016 14:22:45:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6489s; samplesPerSecond = 986.3
+MPI Rank 1: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6716s; samplesPerSecond = 952.9
+MPI Rank 1: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.7764s; samplesPerSecond = 824.3
+MPI Rank 1: 05/03/2016 14:22:47:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6276s; samplesPerSecond = 1019.8
+MPI Rank 1: 05/03/2016 14:22:48:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6684s; samplesPerSecond = 957.5
+MPI Rank 1: 05/03/2016 14:22:48:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6698s; samplesPerSecond = 955.5
+MPI Rank 1: 05/03/2016 14:22:49:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6253s; samplesPerSecond = 1023.5
+MPI Rank 1: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6244s; samplesPerSecond = 1025.0
+MPI Rank 1: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6457s; samplesPerSecond = 991.2
+MPI Rank 1: 05/03/2016 14:22:51:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.7141s; samplesPerSecond = 896.2
+MPI Rank 1: 05/03/2016 14:22:51:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.5560s; samplesPerSecond = 1151.1
+MPI Rank 1: 05/03/2016 14:22:52:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6676s; samplesPerSecond = 958.7
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.5668s; samplesPerSecond = 1129.1
+MPI Rank 1: 05/03/2016 14:22:53:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6920s; samplesPerSecond = 924.9
+MPI Rank 1: 05/03/2016 14:22:54:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6198s; samplesPerSecond = 1032.6
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.6674s; samplesPerSecond = 959.0
+MPI Rank 1: 05/03/2016 14:22:55:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.5590s; samplesPerSecond = 1144.9
+MPI Rank 1: 05/03/2016 14:22:56:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6502s; samplesPerSecond = 984.3
+MPI Rank 1: 05/03/2016 14:22:57:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6702s; samplesPerSecond = 954.9
+MPI Rank 1: 05/03/2016 14:22:57:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6889s; samplesPerSecond = 929.0
+MPI Rank 1: 05/03/2016 14:22:58:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6278s; samplesPerSecond = 1019.5
+MPI Rank 1: 05/03/2016 14:22:59:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7472s; samplesPerSecond = 856.6
+MPI Rank 1: 05/03/2016 14:22:59:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6689s; samplesPerSecond = 956.7
+MPI Rank 1: 05/03/2016 14:23:00:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6796s; samplesPerSecond = 941.7
+MPI Rank 1: 05/03/2016 14:23:01:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7121s; samplesPerSecond = 898.8
+MPI Rank 1: 05/03/2016 14:23:01:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6061s; samplesPerSecond = 1055.9
+MPI Rank 1: 05/03/2016 14:23:02:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6937s; samplesPerSecond = 922.6
+MPI Rank 1: 05/03/2016 14:23:03:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.6720s; samplesPerSecond = 952.4
+MPI Rank 1: 05/03/2016 14:23:03: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.1203s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:23:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.046476
+MPI Rank 1: 05/03/2016 14:23:03: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.074381
+MPI Rank 1: Async gradient aggregation wait time: 0.027508
+MPI Rank 1: Actual gradient aggregation time: 0.090669
+MPI Rank 1: 05/03/2016 14:23:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.8431s; samplesPerSecond = 2732.7
+MPI Rank 1: Async gradient aggregation wait time: 0.04419
+MPI Rank 1: Actual gradient aggregation time: 0.083229
+MPI Rank 1: Async gradient aggregation wait time: 0.038712
+MPI Rank 1: Actual gradient aggregation time: 0.082098
+MPI Rank 1: 05/03/2016 14:23:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.8817s; samplesPerSecond = 2903.3
+MPI Rank 1: Async gradient aggregation wait time: 0.036811
+MPI Rank 1: Actual gradient aggregation time: 0.078753
+MPI Rank 1: Async gradient aggregation wait time: 0.046747
+MPI Rank 1: Actual gradient aggregation time: 0.083512
+MPI Rank 1: 05/03/2016 14:23:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.8638s; samplesPerSecond = 2963.8
+MPI Rank 1: Async gradient aggregation wait time: 0.048791
+MPI Rank 1: Actual gradient aggregation time: 0.082547
+MPI Rank 1: Async gradient aggregation wait time: 0.035459
+MPI Rank 1: Actual gradient aggregation time: 0.076352
+MPI Rank 1: 05/03/2016 14:23:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.8718s; samplesPerSecond = 2936.3
+MPI Rank 1: Async gradient aggregation wait time: 0.043805
+MPI Rank 1: Actual gradient aggregation time: 0.085567
+MPI Rank 1: Async gradient aggregation wait time: 0.048436
+MPI Rank 1: Actual gradient aggregation time: 0.076548
+MPI Rank 1: 05/03/2016 14:23:07:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8678s; samplesPerSecond = 2949.8
+MPI Rank 1: Async gradient aggregation wait time: 0.040026
+MPI Rank 1: Actual gradient aggregation time: 0.095852
+MPI Rank 1: Async gradient aggregation wait time: 0.005412
+MPI Rank 1: Actual gradient aggregation time: 0.081784
+MPI Rank 1: 05/03/2016 14:23:08:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.8877s; samplesPerSecond = 2884.0
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.042421
-MPI Rank 1: 05/03/2016 14:26:38:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.8628s; samplesPerSecond = 2670.4
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.037907
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.059053
-MPI Rank 1: 05/03/2016 14:26:39:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.8514s; samplesPerSecond = 3006.8
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.062694
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.067824
-MPI Rank 1: 05/03/2016 14:26:40:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.9093s; samplesPerSecond = 2815.4
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.066544
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.067812
-MPI Rank 1: 05/03/2016 14:26:41:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.8979s; samplesPerSecond = 2851.1
-MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.059383
+MPI Rank 1: Actual gradient aggregation time: 0.032265
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.061913
-MPI Rank 1: 05/03/2016 14:26:42:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8650s; samplesPerSecond = 2959.6
-MPI Rank 1: Async gradient aggregation wait time: 0.026139
-MPI Rank 1: Actual gradient aggregation time: 0.0891
-MPI Rank 1: Async gradient aggregation wait time: 0.037177
-MPI Rank 1: Actual gradient aggregation time: 0.078903
-MPI Rank 1: 05/03/2016 14:26:43:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.9017s; samplesPerSecond = 2839.2
-MPI Rank 1: Async gradient aggregation wait time: 0.0469
-MPI Rank 1: Actual gradient aggregation time: 0.079612
-MPI Rank 1: Async gradient aggregation wait time: 0.036695
-MPI Rank 1: Actual gradient aggregation time: 0.075676
-MPI Rank 1: 05/03/2016 14:26:43:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8945s; samplesPerSecond = 2862.0
-MPI Rank 1: Async gradient aggregation wait time: 0.034725
-MPI Rank 1: Actual gradient aggregation time: 0.077876
-MPI Rank 1: Async gradient aggregation wait time: 0.035226
-MPI Rank 1: Actual gradient aggregation time: 0.078808
-MPI Rank 1: 05/03/2016 14:26:44:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.8935s; samplesPerSecond = 2865.1
-MPI Rank 1: Async gradient aggregation wait time: 0.02817
-MPI Rank 1: Actual gradient aggregation time: 0.010008
-MPI Rank 1: 05/03/2016 14:26:44: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.11981s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:44: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.093525
+MPI Rank 1: Actual gradient aggregation time: 0.031353
+MPI Rank 1: 05/03/2016 14:23:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8709s; samplesPerSecond = 2939.5
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.174809
-MPI Rank 1: 05/03/2016 14:26:47:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 2.9931s; samplesPerSecond = 3079.1
+MPI Rank 1: Actual gradient aggregation time: 0.078363
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
-MPI Rank 1: Actual gradient aggregation time: 0.104337
+MPI Rank 1: Actual gradient aggregation time: 0.03218
+MPI Rank 1: 05/03/2016 14:23:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.9110s; samplesPerSecond = 2810.0
+MPI Rank 1: Async gradient aggregation wait time: 0.006199
+MPI Rank 1: Actual gradient aggregation time: 0.010001
+MPI Rank 1: 05/03/2016 14:23:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.02014s
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:23:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:23:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.121515
+MPI Rank 1: Actual gradient aggregation time: 0.305048
+MPI Rank 1: Async gradient aggregation wait time: 0.17603
+MPI Rank 1: Actual gradient aggregation time: 0.244912
+MPI Rank 1: 05/03/2016 14:23:13:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 2.9792s; samplesPerSecond = 3093.5
+MPI Rank 1: Async gradient aggregation wait time: 0.105209
+MPI Rank 1: Actual gradient aggregation time: 0.248601
+MPI Rank 1: Async gradient aggregation wait time: 0.144591
+MPI Rank 1: Actual gradient aggregation time: 0.188328
+MPI Rank 1: 05/03/2016 14:23:16:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 3.0288s; samplesPerSecond = 3380.9
+MPI Rank 1: 05/03/2016 14:23:16: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.1402s
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:23:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
+MPI Rank 1: 
+MPI Rank 1: 05/03/2016 14:23:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.098173
-MPI Rank 1: 05/03/2016 14:26:50:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 2.9794s; samplesPerSecond = 3436.9
-MPI Rank 1: 05/03/2016 14:26:51: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.10385s
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:51: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.116936
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.147516
+MPI Rank 1: 05/03/2016 14:23:20:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 2.9850s; samplesPerSecond = 3087.4
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
-MPI Rank 1: Actual gradient aggregation time: 0.125067
-MPI Rank 1: Async gradient aggregation wait time: 0.063608
-MPI Rank 1: Actual gradient aggregation time: 0.304093
-MPI Rank 1: 05/03/2016 14:26:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 2.8700s; samplesPerSecond = 3211.1
-MPI Rank 1: Async gradient aggregation wait time: 0.058226
-MPI Rank 1: Actual gradient aggregation time: 0.302381
-MPI Rank 1: Async gradient aggregation wait time: 0.056478
-MPI Rank 1: Actual gradient aggregation time: 0.312051
-MPI Rank 1: 05/03/2016 14:26:57:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 3.0069s; samplesPerSecond = 3405.5
-MPI Rank 1: Async gradient aggregation wait time: 0.008202
-MPI Rank 1: 05/03/2016 14:26:57: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.10849s
-MPI Rank 1: 05/03/2016 14:26:57: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: Actual gradient aggregation time: 0.210865
+MPI Rank 1: Async gradient aggregation wait time: 2e-006
+MPI Rank 1: Actual gradient aggregation time: 0.241098
+MPI Rank 1: 05/03/2016 14:23:22:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 2.9377s; samplesPerSecond = 3485.7
+MPI Rank 1: Async gradient aggregation wait time: 0.011891
+MPI Rank 1: 05/03/2016 14:23:23: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.07995s
+MPI Rank 1: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:57: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:23:23: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:57: __COMPLETED__
-MPI Rank 2: 05/03/2016 14:26:14: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 14:26:14: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 14:26:14: Build info: 
+MPI Rank 1: 05/03/2016 14:23:23: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:22:39: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:22:39: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:22:39: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: 		Built time: May  3 2016 13:23:06
-MPI Rank 2: 05/03/2016 14:26:14: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 2: 05/03/2016 14:26:14: 		Build type: Release
-MPI Rank 2: 05/03/2016 14:26:14: 		Build target: GPU
-MPI Rank 2: 05/03/2016 14:26:14: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 14:26:14: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 14:26:14: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 14:26:14: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 14:26:14: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 14:26:14: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 14:26:14: 		Built by svcphil on LIANA-09-w
-MPI Rank 2: 05/03/2016 14:26:14: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 14:26:14: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:22:39: 		Built time: May  3 2016 13:23:06
+MPI Rank 2: 05/03/2016 14:22:39: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 2: 05/03/2016 14:22:39: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:22:39: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:22:39: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:22:39: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:22:39: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:22:39: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:22:39: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:22:39: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:22:39: 		Built by svcphil on LIANA-09-w
+MPI Rank 2: 05/03/2016 14:22:39: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:22:39: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Running on cntk-muc02 at 2016/05/03 14:26:14
-MPI Rank 2: 05/03/2016 14:26:14: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: 05/03/2016 14:22:39: Running on cntk-muc02 at 2016/05/03 14:22:39
+MPI Rank 2: 05/03/2016 14:22:39: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu  DeviceId=-1  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:26:14: precision = "float"
+MPI Rank 2: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:39: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1392,14 +1394,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -1407,18 +1407,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:26:14: precision = "float"
+MPI Rank 2: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:39: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = -1
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1502,14 +1502,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: DeviceId=-1
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -1517,24 +1515,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:22:39: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=-1
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = -1
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1616,35 +1614,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 14:26:14: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 14:26:14: Commands: speechTrain
-MPI Rank 2: 05/03/2016 14:26:14: Precision = "double"
-MPI Rank 2: 05/03/2016 14:26:14: Using 1 CPU threads.
-MPI Rank 2: 05/03/2016 14:26:14: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 14:26:14: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 14:26:14: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 14:22:39: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:22:39: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:22:39: Precision = "double"
+MPI Rank 2: 05/03/2016 14:22:39: Using 1 CPU threads.
+MPI Rank 2: 05/03/2016 14:22:39: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:22:39: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 14:22:39: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: ##############################################################################
-MPI Rank 2: 05/03/2016 14:26:14: #                                                                            #
-MPI Rank 2: 05/03/2016 14:26:14: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 14:26:14: #                                                                            #
-MPI Rank 2: 05/03/2016 14:26:14: ##############################################################################
+MPI Rank 2: 05/03/2016 14:22:39: ##############################################################################
+MPI Rank 2: 05/03/2016 14:22:39: #                                                                            #
+MPI Rank 2: 05/03/2016 14:22:39: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:22:39: #                                                                            #
+MPI Rank 2: 05/03/2016 14:22:39: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:22:39: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using CPU
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:22:40: Creating virgin network.
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
 MPI Rank 2: 
@@ -1696,14 +1693,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Created model with 25 nodes on CPU.
+MPI Rank 2: 05/03/2016 14:22:40: Created model with 25 nodes on CPU.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Training criterion node(s):
-MPI Rank 2: 05/03/2016 14:26:14: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:22:40: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:22:40: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:22:40: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:22:40: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1711,160 +1708,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 0000005DF0853DB0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 0000005DF0853E50: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 0000005DF0853EF0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 0000005DF0854170: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 0000005DF0854210: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 0000005DF08543F0: {[B2 Value[132 x 1]] }
-MPI Rank 2: 0000005DF0854490: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 0000005DF0854670: {[W0*features Value[512 x *]] }
-MPI Rank 2: 0000005DF0854710: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 0000005DF0854B70: {[W2 Value[132 x 512]] }
-MPI Rank 2: 0000005DF0854C10: {[Prior Value[132]] }
-MPI Rank 2: 0000005DF0854D50: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 0000005DF0854E90: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 2: 0000005DF0854F30: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 0000005DF0855070: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 0000005DF0855250: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 0000005DF08552F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 0000005DF08554D0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 0000005DF0855570: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 0000005DF0855610: {[labels Value[132 x *]] }
-MPI Rank 2: 0000005DF08556B0: {[LogOfPrior Value[132]] }
-MPI Rank 2: 0000005DF087DDF0: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 0000005DF087DE90: {[B0 Value[512 x 1]] }
-MPI Rank 2: 0000005DF087E250: {[B1 Value[512 x 1]] }
-MPI Rank 2: 0000005DF087E2F0: {[features Value[363 x *]] }
-MPI Rank 2: 0000005DF087E390: {[W1 Value[512 x 512]] }
-MPI Rank 2: 0000005DF087EA70: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 0000005DF087EC50: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000DCE2CDFF00: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000DCE2CE0180: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000DCE2CE0400: {[features Value[363 x *]] }
+MPI Rank 2: 000000DCE2CE04A0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000DCE2CE0860: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000DCEC0A31A0: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000DCEC0A3240: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A32E0: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000DCEC0A3380: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A3420: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A3560: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A3600: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000DCEC0A36A0: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000DCEC0A3740: {[labels Value[132 x *]] }
+MPI Rank 2: 000000DCEC0A37E0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000DCEC0A3B00: {[Prior Value[132]] }
+MPI Rank 2: 000000DCEC0A3BA0: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000DCEC0A3C40: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A3E20: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000DCEC0A3EC0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A3F60: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000DCEC0A4280: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000DCEC0A4500: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000DCEC0A4640: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A46E0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000DCEC0A4A00: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000DCEC0A4FA0: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000DCEC0FD0A0: {[B2 Gradient[132 x 1]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:22:40: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:14: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 14:26:14: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 14:26:14: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:22:40: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:22:40: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:22:40: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:16: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:22:41: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:22:42: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:16: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 14:26:17:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.8760s; samplesPerSecond = 730.6
-MPI Rank 2: 05/03/2016 14:26:17:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7316s; samplesPerSecond = 874.8
-MPI Rank 2: 05/03/2016 14:26:18:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6643s; samplesPerSecond = 963.5
-MPI Rank 2: 05/03/2016 14:26:19:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6601s; samplesPerSecond = 969.6
-MPI Rank 2: 05/03/2016 14:26:19:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6662s; samplesPerSecond = 960.7
-MPI Rank 2: 05/03/2016 14:26:20:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.7075s; samplesPerSecond = 904.6
-MPI Rank 2: 05/03/2016 14:26:21:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.6662s; samplesPerSecond = 960.7
-MPI Rank 2: 05/03/2016 14:26:21:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.6849s; samplesPerSecond = 934.5
-MPI Rank 2: 05/03/2016 14:26:22:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6402s; samplesPerSecond = 999.7
-MPI Rank 2: 05/03/2016 14:26:23:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6942s; samplesPerSecond = 921.9
-MPI Rank 2: 05/03/2016 14:26:23:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.7056s; samplesPerSecond = 907.0
-MPI Rank 2: 05/03/2016 14:26:24:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6761s; samplesPerSecond = 946.5
-MPI Rank 2: 05/03/2016 14:26:25:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.6646s; samplesPerSecond = 963.0
-MPI Rank 2: 05/03/2016 14:26:25:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6902s; samplesPerSecond = 927.3
-MPI Rank 2: 05/03/2016 14:26:26:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6621s; samplesPerSecond = 966.7
-MPI Rank 2: 05/03/2016 14:26:27:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.6392s; samplesPerSecond = 1001.2
-MPI Rank 2: 05/03/2016 14:26:27:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.7047s; samplesPerSecond = 908.2
-MPI Rank 2: 05/03/2016 14:26:28:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.7380s; samplesPerSecond = 867.3
-MPI Rank 2: 05/03/2016 14:26:29:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.7440s; samplesPerSecond = 860.3
-MPI Rank 2: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7102s; samplesPerSecond = 901.2
-MPI Rank 2: 05/03/2016 14:26:30:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.7532s; samplesPerSecond = 849.7
-MPI Rank 2: 05/03/2016 14:26:31:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6618s; samplesPerSecond = 967.1
-MPI Rank 2: 05/03/2016 14:26:32:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.7536s; samplesPerSecond = 849.3
-MPI Rank 2: 05/03/2016 14:26:33:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.7546s; samplesPerSecond = 848.1
-MPI Rank 2: 05/03/2016 14:26:33:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6667s; samplesPerSecond = 959.9
-MPI Rank 2: 05/03/2016 14:26:34:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.6855s; samplesPerSecond = 933.7
-MPI Rank 2: 05/03/2016 14:26:35:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6441s; samplesPerSecond = 993.6
-MPI Rank 2: 05/03/2016 14:26:35:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.7265s; samplesPerSecond = 881.0
-MPI Rank 2: 05/03/2016 14:26:36:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7340s; samplesPerSecond = 871.9
-MPI Rank 2: 05/03/2016 14:26:37:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6354s; samplesPerSecond = 1007.2
-MPI Rank 2: 05/03/2016 14:26:37:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.2971s; samplesPerSecond = 2154.1
-MPI Rank 2: 05/03/2016 14:26:37:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.2187s; samplesPerSecond = 2926.7
-MPI Rank 2: 05/03/2016 14:26:37: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.5404s
+MPI Rank 2: 05/03/2016 14:22:42: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 14:22:42:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944908 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.6735s; samplesPerSecond = 950.3
+MPI Rank 2: 05/03/2016 14:22:43:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22299987 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.7452s; samplesPerSecond = 858.8
+MPI Rank 2: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971343 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.6696s; samplesPerSecond = 955.8
+MPI Rank 2: 05/03/2016 14:22:44:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341692 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.6934s; samplesPerSecond = 923.0
+MPI Rank 2: 05/03/2016 14:22:45:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074483 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.6915s; samplesPerSecond = 925.5
+MPI Rank 2: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71252184 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.6949s; samplesPerSecond = 921.0
+MPI Rank 2: 05/03/2016 14:22:46:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563464 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.5974s; samplesPerSecond = 1071.3
+MPI Rank 2: 05/03/2016 14:22:47:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49349060 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.7608s; samplesPerSecond = 841.2
+MPI Rank 2: 05/03/2016 14:22:48:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34740070 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.6690s; samplesPerSecond = 956.7
+MPI Rank 2: 05/03/2016 14:22:48:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51960918 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.6919s; samplesPerSecond = 925.0
+MPI Rank 2: 05/03/2016 14:22:49:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656049 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.6267s; samplesPerSecond = 1021.2
+MPI Rank 2: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397669 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.6934s; samplesPerSecond = 923.0
+MPI Rank 2: 05/03/2016 14:22:50:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780980 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.7094s; samplesPerSecond = 902.1
+MPI Rank 2: 05/03/2016 14:22:51:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845902 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.6687s; samplesPerSecond = 957.0
+MPI Rank 2: 05/03/2016 14:22:52:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06458212 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.6888s; samplesPerSecond = 929.1
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91633510 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.7806s; samplesPerSecond = 819.9
+MPI Rank 2: 05/03/2016 14:22:53:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90607468 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.6556s; samplesPerSecond = 976.3
+MPI Rank 2: 05/03/2016 14:22:54:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095059 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.6661s; samplesPerSecond = 960.8
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67087924 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6678s; samplesPerSecond = 958.4
+MPI Rank 2: 05/03/2016 14:22:55:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67609083 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.7588s; samplesPerSecond = 843.5
+MPI Rank 2: 05/03/2016 14:22:56:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54732903 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.6953s; samplesPerSecond = 920.4
+MPI Rank 2: 05/03/2016 14:22:57:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925710 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.6679s; samplesPerSecond = 958.2
+MPI Rank 2: 05/03/2016 14:22:57:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52388480 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.6692s; samplesPerSecond = 956.3
+MPI Rank 2: 05/03/2016 14:22:58:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47544601 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.6296s; samplesPerSecond = 1016.6
+MPI Rank 2: 05/03/2016 14:22:59:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265158 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.6552s; samplesPerSecond = 976.7
+MPI Rank 2: 05/03/2016 14:22:59:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728740 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.7139s; samplesPerSecond = 896.5
+MPI Rank 2: 05/03/2016 14:23:00:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674793 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.6348s; samplesPerSecond = 1008.2
+MPI Rank 2: 05/03/2016 14:23:01:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020940 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.6235s; samplesPerSecond = 1026.5
+MPI Rank 2: 05/03/2016 14:23:01:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400612 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.7639s; samplesPerSecond = 837.8
+MPI Rank 2: 05/03/2016 14:23:02:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885172 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.6705s; samplesPerSecond = 954.5
+MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22712855 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.6866s; samplesPerSecond = 932.1
+MPI Rank 2: 05/03/2016 14:23:03:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604782 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.3045s; samplesPerSecond = 2101.6
+MPI Rank 2: 05/03/2016 14:23:03: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00704835 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=21.5297s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:23:03: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.074302
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.066736
-MPI Rank 2: 05/03/2016 14:26:38:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.8155s; samplesPerSecond = 2825.3
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.064763
+MPI Rank 2: 05/03/2016 14:23:03: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.031018
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.032411
-MPI Rank 2: 05/03/2016 14:26:39:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.9132s; samplesPerSecond = 2803.4
+MPI Rank 2: Actual gradient aggregation time: 0.039604
+MPI Rank 2: 05/03/2016 14:23:04:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13223658 * 2304; EvalErrorPrediction = 0.57725694 * 2304; time = 0.8418s; samplesPerSecond = 2736.9
+MPI Rank 2: Async gradient aggregation wait time: 0.043953
+MPI Rank 2: Actual gradient aggregation time: 0.070193
+MPI Rank 2: Async gradient aggregation wait time: 0.042393
+MPI Rank 2: Actual gradient aggregation time: 0.066855
+MPI Rank 2: 05/03/2016 14:23:05:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.12726818 * 2560; EvalErrorPrediction = 0.59375000 * 2560; time = 0.8566s; samplesPerSecond = 2988.6
+MPI Rank 2: Async gradient aggregation wait time: 0.042275
+MPI Rank 2: Actual gradient aggregation time: 0.089017
+MPI Rank 2: Async gradient aggregation wait time: 0.026615
+MPI Rank 2: Actual gradient aggregation time: 0.093695
+MPI Rank 2: 05/03/2016 14:23:06:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.8878s; samplesPerSecond = 2883.7
+MPI Rank 2: Async gradient aggregation wait time: 0.025158
+MPI Rank 2: Actual gradient aggregation time: 0.093798
+MPI Rank 2: Async gradient aggregation wait time: 0.043512
+MPI Rank 2: Actual gradient aggregation time: 0.085555
+MPI Rank 2: 05/03/2016 14:23:07:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.8737s; samplesPerSecond = 2930.2
+MPI Rank 2: Async gradient aggregation wait time: 0.025367
+MPI Rank 2: Actual gradient aggregation time: 0.095238
+MPI Rank 2: Async gradient aggregation wait time: 0.02648
+MPI Rank 2: Actual gradient aggregation time: 0.08701
+MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8675s; samplesPerSecond = 2950.9
+MPI Rank 2: Async gradient aggregation wait time: 0.045444
+MPI Rank 2: Actual gradient aggregation time: 0.079243
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.034905
+MPI Rank 2: Actual gradient aggregation time: 0.024135
+MPI Rank 2: 05/03/2016 14:23:08:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.9048s; samplesPerSecond = 2829.2
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.00745
-MPI Rank 2: 05/03/2016 14:26:40:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18923682 * 2560; EvalErrorPrediction = 0.58437500 * 2560; time = 0.9350s; samplesPerSecond = 2737.9
+MPI Rank 2: Actual gradient aggregation time: 0.00458
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.005989
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.006456
-MPI Rank 2: 05/03/2016 14:26:41:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.11532284 * 2560; EvalErrorPrediction = 0.57968750 * 2560; time = 0.8612s; samplesPerSecond = 2972.6
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.036517
+MPI Rank 2: Actual gradient aggregation time: 0.004497
+MPI Rank 2: 05/03/2016 14:23:09:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8965s; samplesPerSecond = 2855.4
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.036166
-MPI Rank 2: 05/03/2016 14:26:42:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01746278 * 2560; EvalErrorPrediction = 0.55625000 * 2560; time = 0.8744s; samplesPerSecond = 2927.8
+MPI Rank 2: Actual gradient aggregation time: 0.019382
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.05573
-MPI Rank 2: Async gradient aggregation wait time: 0.043829
-MPI Rank 2: Actual gradient aggregation time: 0.088924
-MPI Rank 2: 05/03/2016 14:26:43:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.06105301 * 2560; EvalErrorPrediction = 0.57617188 * 2560; time = 0.8575s; samplesPerSecond = 2985.5
-MPI Rank 2: Async gradient aggregation wait time: 0.025867
-MPI Rank 2: Actual gradient aggregation time: 0.090221
-MPI Rank 2: Async gradient aggregation wait time: 0.041672
-MPI Rank 2: Actual gradient aggregation time: 0.086182
-MPI Rank 2: 05/03/2016 14:26:43:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.12283592 * 2560; EvalErrorPrediction = 0.56054688 * 2560; time = 0.8935s; samplesPerSecond = 2865.3
-MPI Rank 2: Async gradient aggregation wait time: 0.040428
-MPI Rank 2: Actual gradient aggregation time: 0.08919
-MPI Rank 2: Async gradient aggregation wait time: 0.041937
-MPI Rank 2: Actual gradient aggregation time: 0.088409
-MPI Rank 2: 05/03/2016 14:26:44:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.8865s; samplesPerSecond = 2887.7
-MPI Rank 2: Async gradient aggregation wait time: 0.043539
-MPI Rank 2: Actual gradient aggregation time: 0.0123
-MPI Rank 2: 05/03/2016 14:26:44: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.11719s
+MPI Rank 2: Actual gradient aggregation time: 0.004502
+MPI Rank 2: 05/03/2016 14:23:10:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06473750 * 2560; EvalErrorPrediction = 0.56953125 * 2560; time = 0.8344s; samplesPerSecond = 3068.2
+MPI Rank 2: Async gradient aggregation wait time: 0.023162
+MPI Rank 2: Actual gradient aggregation time: 0.012226
+MPI Rank 2: 05/03/2016 14:23:10: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.10367019 * 20480; EvalErrorPrediction = 0.57451172 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=7.00256s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:23:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:45: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 05/03/2016 14:23:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.006825
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.017254
-MPI Rank 2: 05/03/2016 14:26:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 3.0372s; samplesPerSecond = 3034.4
+MPI Rank 2: Actual gradient aggregation time: 0.007524
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.012134
-MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.005987
-MPI Rank 2: 05/03/2016 14:26:51:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 2.9308s; samplesPerSecond = 3493.9
-MPI Rank 2: 05/03/2016 14:26:51: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.04686s
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:51: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
-MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:51: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.02351
-MPI Rank 2: Async gradient aggregation wait time: 3e-006
-MPI Rank 2: Actual gradient aggregation time: 0.008606
-MPI Rank 2: 05/03/2016 14:26:54:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 3.0974s; samplesPerSecond = 2975.4
+MPI Rank 2: Actual gradient aggregation time: 0.017001
+MPI Rank 2: 05/03/2016 14:23:14:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04372754 * 9216; EvalErrorPrediction = 0.54318576 * 9216; time = 3.2131s; samplesPerSecond = 2868.3
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.016985
+MPI Rank 2: Actual gradient aggregation time: 0.00724
+MPI Rank 2: Async gradient aggregation wait time: 3e-006
+MPI Rank 2: Actual gradient aggregation time: 0.044002
+MPI Rank 2: 05/03/2016 14:23:16:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94811890 * 10240; EvalErrorPrediction = 0.52695313 * 10240; time = 2.8539s; samplesPerSecond = 3588.1
+MPI Rank 2: 05/03/2016 14:23:16: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98973403 * 20480; EvalErrorPrediction = 0.53388672 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=6.0877s
+MPI Rank 2: 
+MPI Rank 2: 05/03/2016 14:23:17: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
+MPI Rank 2: 
+MPI Rank 2: 05/03/2016 14:23:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.008031
-MPI Rank 2: 05/03/2016 14:26:57:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 2.8883s; samplesPerSecond = 3545.3
-MPI Rank 2: Async gradient aggregation wait time: 0.011495
-MPI Rank 2: 05/03/2016 14:26:57: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.05467s
-MPI Rank 2: 05/03/2016 14:26:57: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: Actual gradient aggregation time: 0.015821
+MPI Rank 2: Async gradient aggregation wait time: 2e-006
+MPI Rank 2: Actual gradient aggregation time: 0.040683
+MPI Rank 2: 05/03/2016 14:23:20:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89718928 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 3.0434s; samplesPerSecond = 3028.2
+MPI Rank 2: Async gradient aggregation wait time: 3e-006
+MPI Rank 2: Actual gradient aggregation time: 0.010109
+MPI Rank 2: Async gradient aggregation wait time: 2e-006
+MPI Rank 2: Actual gradient aggregation time: 0.040104
+MPI Rank 2: 05/03/2016 14:23:23:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.88234725 * 10240; EvalErrorPrediction = 0.51093750 * 10240; time = 2.9567s; samplesPerSecond = 3463.3
+MPI Rank 2: Async gradient aggregation wait time: 0.01776
+MPI Rank 2: 05/03/2016 14:23:23: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88941123 * 20480; EvalErrorPrediction = 0.51376953 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=6.03215s
+MPI Rank 2: 05/03/2016 14:23:23: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:57: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:23:23: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:26:57: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:23:23: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
index b944672cf..d0468e3ab 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 -------------------------------------------------------------------
 Build info: 
 
@@ -61,18 +61,18 @@ requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (before change)]: all 3 nodes responded
 ping [requestnodes (before change)]: all 3 nodes responded
-requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating)
+requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating)
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: 3 nodes pinging each other
 ping [requestnodes (after change)]: all 3 nodes responded
-ping [requestnodes (after change)]: all 3 nodes responded
-ping [requestnodes (after change)]: all 3 nodes responded
-mpihelper: we are cog 0 in a gearbox of 3
-mpihelper: we are cog 2 in a gearbox of 3
-ping [mpihelper]: 3 nodes pinging each other
-ping [mpihelper]: 3 nodes pinging each other
 mpihelper: we are cog 1 in a gearbox of 3
+ping [requestnodes (after change)]: all 3 nodes responded
+ping [mpihelper]: 3 nodes pinging each other
+mpihelper: we are cog 0 in a gearbox of 3
+ping [requestnodes (after change)]: all 3 nodes responded
+ping [mpihelper]: 3 nodes pinging each other
+mpihelper: we are cog 2 in a gearbox of 3
 ping [mpihelper]: 3 nodes pinging each other
 ping [mpihelper]: all 3 nodes responded
 ping [mpihelper]: all 3 nodes responded
@@ -81,40 +81,45 @@ ping [mpihelper]: all 3 nodes responded
 job aborted:
 [ranks] message
 
-[0-2] process exited without calling finalize
+[0] process exited without calling finalize
+
+[1-2] process exited without calling finalize
 
 ---- error analysis -----
 
-[0-2] on CNTK-MUC02
+[0] on CNTK-MUC02
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code -1
+
+[1-2] on CNTK-MUC02
 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe ended prematurely and may have crashed. exit code 0
 
 ---- error analysis -----
-MPI Rank 0: 05/03/2016 14:26:59: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
-MPI Rank 0: 05/03/2016 14:26:59: -------------------------------------------------------------------
-MPI Rank 0: 05/03/2016 14:26:59: Build info: 
+MPI Rank 0: 05/03/2016 14:23:25: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0
+MPI Rank 0: 05/03/2016 14:23:25: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:23:25: Build info: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: 		Built time: May  3 2016 13:23:06
-MPI Rank 0: 05/03/2016 14:26:59: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 0: 05/03/2016 14:26:59: 		Build type: Release
-MPI Rank 0: 05/03/2016 14:26:59: 		Build target: GPU
-MPI Rank 0: 05/03/2016 14:26:59: 		With 1bit-SGD: no
-MPI Rank 0: 05/03/2016 14:26:59: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 0: 05/03/2016 14:26:59: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 0: 05/03/2016 14:26:59: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 0: 05/03/2016 14:26:59: 		Build Branch: HEAD
-MPI Rank 0: 05/03/2016 14:26:59: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 0: 05/03/2016 14:26:59: 		Built by svcphil on LIANA-09-w
-MPI Rank 0: 05/03/2016 14:26:59: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 0: 05/03/2016 14:26:59: -------------------------------------------------------------------
+MPI Rank 0: 05/03/2016 14:23:25: 		Built time: May  3 2016 13:23:06
+MPI Rank 0: 05/03/2016 14:23:25: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 0: 05/03/2016 14:23:25: 		Build type: Release
+MPI Rank 0: 05/03/2016 14:23:25: 		Build target: GPU
+MPI Rank 0: 05/03/2016 14:23:25: 		With 1bit-SGD: no
+MPI Rank 0: 05/03/2016 14:23:25: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 0: 05/03/2016 14:23:25: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 0: 05/03/2016 14:23:25: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 0: 05/03/2016 14:23:25: 		Build Branch: HEAD
+MPI Rank 0: 05/03/2016 14:23:25: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 0: 05/03/2016 14:23:25: 		Built by svcphil on LIANA-09-w
+MPI Rank 0: 05/03/2016 14:23:25: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 0: 05/03/2016 14:23:25: -------------------------------------------------------------------
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: Running on cntk-muc02 at 2016/05/03 14:26:59
-MPI Rank 0: 05/03/2016 14:26:59: Command line: 
-MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: 05/03/2016 14:23:25: Running on cntk-muc02 at 2016/05/03 14:23:25
+MPI Rank 0: 05/03/2016 14:23:25: Command line: 
+MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:26:59: precision = "float"
+MPI Rank 0: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:23:25: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = $DeviceId$
 MPI Rank 0: parallelTrain = true
@@ -204,14 +209,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -219,18 +222,18 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 0: 05/03/2016 14:26:59: precision = "float"
+MPI Rank 0: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:23:25: precision = "float"
 MPI Rank 0: command = speechTrain
 MPI Rank 0: deviceId = 0
 MPI Rank 0: parallelTrain = true
 MPI Rank 0: speechTrain = [
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -314,14 +317,12 @@ MPI Rank 0:         ]
 MPI Rank 0:     ]
 MPI Rank 0: ]
 MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: DeviceId=0
 MPI Rank 0: timestamping=true
-MPI Rank 0: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 0: speechTrain=[reader=[prefetch=true]]
 MPI Rank 0: numCPUThreads=1
 MPI Rank 0: precision=double
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -329,24 +330,24 @@ MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 0: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 0: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 0: configparameters: cntk.cntk:deviceId=0
 MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 0: configparameters: cntk.cntk:precision=double
-MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 0: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 0:     action = "train"
-MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 0:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 0:     deviceId = 0
 MPI Rank 0:     traceLevel = 1
 MPI Rank 0:     SimpleNetworkBuilder = [
@@ -428,35 +429,34 @@ MPI Rank 0:             labelDim = 132
 MPI Rank 0:             labelType = "category"
 MPI Rank 0:         ]
 MPI Rank 0:     ]
-MPI Rank 0: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 0: 
-MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 0: configparameters: cntk.cntk:timestamping=true
-MPI Rank 0: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 0: 05/03/2016 14:26:59: Commands: speechTrain
-MPI Rank 0: 05/03/2016 14:26:59: Precision = "double"
-MPI Rank 0: 05/03/2016 14:26:59: Using 1 CPU threads.
-MPI Rank 0: 05/03/2016 14:26:59: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 0: 05/03/2016 14:26:59: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 0: 05/03/2016 14:26:59: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 0: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 0: 05/03/2016 14:23:25: Commands: speechTrain
+MPI Rank 0: 05/03/2016 14:23:25: Precision = "double"
+MPI Rank 0: 05/03/2016 14:23:25: Using 1 CPU threads.
+MPI Rank 0: 05/03/2016 14:23:25: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 0: 05/03/2016 14:23:25: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 0: 05/03/2016 14:23:25: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: ##############################################################################
-MPI Rank 0: 05/03/2016 14:26:59: #                                                                            #
-MPI Rank 0: 05/03/2016 14:26:59: # Action "train"                                                             #
-MPI Rank 0: 05/03/2016 14:26:59: #                                                                            #
-MPI Rank 0: 05/03/2016 14:26:59: ##############################################################################
+MPI Rank 0: 05/03/2016 14:23:25: ##############################################################################
+MPI Rank 0: 05/03/2016 14:23:25: #                                                                            #
+MPI Rank 0: 05/03/2016 14:23:25: # Action "train"                                                             #
+MPI Rank 0: 05/03/2016 14:23:25: #                                                                            #
+MPI Rank 0: 05/03/2016 14:23:25: ##############################################################################
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: CNTKCommandTrainBegin: speechTrain
+MPI Rank 0: 05/03/2016 14:23:25: CNTKCommandTrainBegin: speechTrain
 MPI Rank 0: SimpleNetworkBuilder Using GPU 0
-MPI Rank 0: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 0: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 0: reading script file glob_0000.scp ... 948 entries
 MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 0: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 0: label set 0: 129 classes
+MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:26:59: Creating virgin network.
+MPI Rank 0: 05/03/2016 14:23:25: Creating virgin network.
 MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 0: 
 MPI Rank 0: Post-processing network...
@@ -509,14 +509,14 @@ MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 0: 
 MPI Rank 0: Post-processing network complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: Created model with 25 nodes on GPU 0.
+MPI Rank 0: 05/03/2016 14:23:26: Created model with 25 nodes on GPU 0.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: Training criterion node(s):
-MPI Rank 0: 05/03/2016 14:27:00: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 0: 05/03/2016 14:23:26: Training criterion node(s):
+MPI Rank 0: 05/03/2016 14:23:26: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: Evaluation criterion node(s):
+MPI Rank 0: 05/03/2016 14:23:26: Evaluation criterion node(s):
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 0: 05/03/2016 14:23:26: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 0: 
 MPI Rank 0: 
 MPI Rank 0: Allocating matrices for forward and/or backward propagation.
@@ -524,193 +524,199 @@ MPI Rank 0:
 MPI Rank 0: Memory Sharing Structure:
 MPI Rank 0: 
 MPI Rank 0: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 0: 000000A15793DD60: {[features Value[363 x *]] }
-MPI Rank 0: 000000A17961E640: {[B0 Value[512 x 1]] }
-MPI Rank 0: 000000A17961E780: {[B1 Value[512 x 1]] }
-MPI Rank 0: 000000A17961E960: {[W1 Value[512 x 512]] }
-MPI Rank 0: 000000A17961EA00: {[W2 Value[132 x 512]] }
-MPI Rank 0: 000000A17961F040: {[MeanOfFeatures Value[363]] }
-MPI Rank 0: 000000A17961F860: {[B2 Value[132 x 1]] }
-MPI Rank 0: 000000A17961FA40: {[InvStdOfFeatures Value[363]] }
-MPI Rank 0: 000000A17961FD60: {[labels Value[132 x *]] }
-MPI Rank 0: 000000A179620300: {[W0 Value[512 x 363]] }
-MPI Rank 0: 000000A179E6A680: {[EvalErrorPrediction Value[1]] }
-MPI Rank 0: 000000A179E6A720: {[Prior Value[132]] }
-MPI Rank 0: 000000A179E6A7C0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 0: 000000A179E6A9A0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 0: 000000A179E6AA40: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 0: 000000A179E6AB80: {[B2 Gradient[132 x 1]] }
-MPI Rank 0: 000000A179E6AC20: {[LogOfPrior Value[132]] }
-MPI Rank 0: 000000A179E6AE00: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 0: 000000A179E6B120: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 0: 000000A179E6B1C0: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 0: 000000A179E6B300: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 0: 000000A179E6B3A0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 0: 000000A179E6B620: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 0: 000000A179E6B8A0: {[W0*features Value[512 x *]] }
-MPI Rank 0: 000000A179E6B940: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 0: 000000A179E6BA80: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000A179E6BB20: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 0: 000000A179E6BBC0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000DB868FD560: {[features Value[363 x *]] }
+MPI Rank 0: 000000DBA50ABA80: {[W2 Value[132 x 512]] }
+MPI Rank 0: 000000DBA50ABB20: {[B2 Value[132 x 1]] }
+MPI Rank 0: 000000DBA50AC200: {[W1 Value[512 x 512]] }
+MPI Rank 0: 000000DBA50AC3E0: {[MeanOfFeatures Value[363]] }
+MPI Rank 0: 000000DBA50ACAC0: {[InvStdOfFeatures Value[363]] }
+MPI Rank 0: 000000DBA50ACF20: {[W0 Value[512 x 363]] }
+MPI Rank 0: 000000DBA50AD420: {[B0 Value[512 x 1]] }
+MPI Rank 0: 000000DBA50AD7E0: {[B1 Value[512 x 1]] }
+MPI Rank 0: 000000DBA767E490: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000DBA767E670: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 0: 000000DBA767E7B0: {[EvalErrorPrediction Value[1]] }
+MPI Rank 0: 000000DBA767E990: {[Prior Value[132]] }
+MPI Rank 0: 000000DBA767EA30: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 0: 000000DBA767EAD0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 0: 000000DBA767EB70: {[B2 Gradient[132 x 1]] }
+MPI Rank 0: 000000DBA767EE90: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 0: 000000DBA767EFD0: {[labels Value[132 x *]] }
+MPI Rank 0: 000000DBA767F2F0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 0: 000000DBA767F4D0: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 0: 000000DBA767F610: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 0: 000000DBA767F930: {[LogOfPrior Value[132]] }
+MPI Rank 0: 000000DBA767F9D0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 0: 000000DBA767FB10: {[W0*features Value[512 x *]] }
+MPI Rank 0: 000000DBA767FBB0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 0: 000000DBA767FE30: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 0: 000000DBA767FED0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 0: 000000DBA7680150: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 0: 05/03/2016 14:23:26: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:00: 	MeanOfFeatures = Mean()
-MPI Rank 0: 05/03/2016 14:27:00: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 0: 05/03/2016 14:27:00: 	Prior = Mean()
+MPI Rank 0: 05/03/2016 14:23:26: 	MeanOfFeatures = Mean()
+MPI Rank 0: 05/03/2016 14:23:26: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 0: 05/03/2016 14:23:26: 	Prior = Mean()
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:04: Precomputing --> Completed.
+MPI Rank 0: 05/03/2016 14:23:30: Precomputing --> Completed.
 MPI Rank 0: 
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:05: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: 05/03/2016 14:23:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:05: Starting minibatch loop.
-MPI Rank 0: 05/03/2016 14:27:05:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4461s; samplesPerSecond = 1434.5
-MPI Rank 0: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3238s; samplesPerSecond = 1976.3
-MPI Rank 0: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3343s; samplesPerSecond = 1914.7
-MPI Rank 0: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3137s; samplesPerSecond = 2040.2
-MPI Rank 0: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3134s; samplesPerSecond = 2042.4
-MPI Rank 0: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3135s; samplesPerSecond = 2041.7
-MPI Rank 0: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3134s; samplesPerSecond = 2042.4
-MPI Rank 0: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3129s; samplesPerSecond = 2045.6
-MPI Rank 0: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3127s; samplesPerSecond = 2046.8
-MPI Rank 0: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3131s; samplesPerSecond = 2044.0
-MPI Rank 0: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3130s; samplesPerSecond = 2044.6
-MPI Rank 0: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3126s; samplesPerSecond = 2047.3
-MPI Rank 0: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 0: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3138s; samplesPerSecond = 2039.4
-MPI Rank 0: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3142s; samplesPerSecond = 2037.2
-MPI Rank 0: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3129s; samplesPerSecond = 2045.5
-MPI Rank 0: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3138s; samplesPerSecond = 2039.4
-MPI Rank 0: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 0: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3234s; samplesPerSecond = 1979.2
-MPI Rank 0: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3131s; samplesPerSecond = 2044.1
-MPI Rank 0: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3028s; samplesPerSecond = 2113.6
-MPI Rank 0: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3136s; samplesPerSecond = 2040.6
-MPI Rank 0: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3129s; samplesPerSecond = 2045.6
-MPI Rank 0: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3132s; samplesPerSecond = 2043.4
-MPI Rank 0: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3137s; samplesPerSecond = 2040.3
-MPI Rank 0: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3127s; samplesPerSecond = 2047.0
-MPI Rank 0: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3135s; samplesPerSecond = 2041.3
-MPI Rank 0: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3138s; samplesPerSecond = 2039.5
-MPI Rank 0: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3142s; samplesPerSecond = 2036.9
-MPI Rank 0: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3132s; samplesPerSecond = 2043.6
-MPI Rank 0: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3131s; samplesPerSecond = 2043.8
-MPI Rank 0: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2821s; samplesPerSecond = 2268.4
-MPI Rank 0: 05/03/2016 14:27:15: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.2437s
-MPI Rank 0: 05/03/2016 14:27:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
+MPI Rank 0: 05/03/2016 14:23:31: Starting minibatch loop.
+MPI Rank 0: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3080s; samplesPerSecond = 2077.7
+MPI Rank 0: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3127s; samplesPerSecond = 2046.8
+MPI Rank 0: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3139s; samplesPerSecond = 2039.1
+MPI Rank 0: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3138s; samplesPerSecond = 2039.7
+MPI Rank 0: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3134s; samplesPerSecond = 2041.9
+MPI Rank 0: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3122s; samplesPerSecond = 2049.7
+MPI Rank 0: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3181s; samplesPerSecond = 2011.7
+MPI Rank 0: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3311s; samplesPerSecond = 1932.8
+MPI Rank 0: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.2179s; samplesPerSecond = 2937.3
+MPI Rank 0: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3237s; samplesPerSecond = 1977.2
+MPI Rank 0: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3132s; samplesPerSecond = 2043.2
+MPI Rank 0: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3127s; samplesPerSecond = 2046.6
+MPI Rank 0: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3235s; samplesPerSecond = 1978.5
+MPI Rank 0: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3128s; samplesPerSecond = 2046.1
+MPI Rank 0: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3223s; samplesPerSecond = 1985.7
+MPI Rank 0: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3127s; samplesPerSecond = 2047.0
+MPI Rank 0: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3143s; samplesPerSecond = 2036.5
+MPI Rank 0: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3134s; samplesPerSecond = 2042.1
+MPI Rank 0: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3128s; samplesPerSecond = 2046.0
+MPI Rank 0: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3127s; samplesPerSecond = 2046.7
+MPI Rank 0: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3122s; samplesPerSecond = 2049.8
+MPI Rank 0: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3135s; samplesPerSecond = 2041.3
+MPI Rank 0: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3134s; samplesPerSecond = 2042.2
+MPI Rank 0: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3131s; samplesPerSecond = 2043.8
+MPI Rank 0: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3129s; samplesPerSecond = 2045.6
+MPI Rank 0: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3136s; samplesPerSecond = 2041.0
+MPI Rank 0: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3127s; samplesPerSecond = 2046.8
+MPI Rank 0: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3126s; samplesPerSecond = 2047.5
+MPI Rank 0: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3241s; samplesPerSecond = 1974.6
+MPI Rank 0: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3443s; samplesPerSecond = 1858.8
+MPI Rank 0: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3129s; samplesPerSecond = 2045.5
+MPI Rank 0: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3130s; samplesPerSecond = 2044.7
+MPI Rank 0: 05/03/2016 14:23:41: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0257s
+MPI Rank 0: 05/03/2016 14:23:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:15: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: 05/03/2016 14:23:42: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Actual gradient aggregation time: 0.033248
+MPI Rank 0: 05/03/2016 14:23:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Actual gradient aggregation time: 0.042635
 MPI Rank 0: Async gradient aggregation wait time: 1e-006
-MPI Rank 0: Actual gradient aggregation time: 0.023153
-MPI Rank 0: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4762s; samplesPerSecond = 4838.3
-MPI Rank 0: Async gradient aggregation wait time: 0.011236
-MPI Rank 0: Actual gradient aggregation time: 0.01118
-MPI Rank 0: Async gradient aggregation wait time: 0.015175
-MPI Rank 0: Actual gradient aggregation time: 0.049141
-MPI Rank 0: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.4966s; samplesPerSecond = 5154.9
-MPI Rank 0: Async gradient aggregation wait time: 0.006179
-MPI Rank 0: Actual gradient aggregation time: 0.04925
-MPI Rank 0: Async gradient aggregation wait time: 0.021881
-MPI Rank 0: Actual gradient aggregation time: 0.048935
-MPI Rank 0: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.5008s; samplesPerSecond = 5112.3
-MPI Rank 0: Async gradient aggregation wait time: 0.007415
-MPI Rank 0: Actual gradient aggregation time: 0.008339
-MPI Rank 0: Async gradient aggregation wait time: 0.008362
-MPI Rank 0: Actual gradient aggregation time: 0.035103
-MPI Rank 0: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5184s; samplesPerSecond = 4938.1
-MPI Rank 0: Async gradient aggregation wait time: 0.010274
-MPI Rank 0: Actual gradient aggregation time: 0.033501
-MPI Rank 0: Async gradient aggregation wait time: 0.008356
-MPI Rank 0: Actual gradient aggregation time: 0.048396
-MPI Rank 0: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5023s; samplesPerSecond = 5096.9
-MPI Rank 0: Async gradient aggregation wait time: 0.008543
-MPI Rank 0: Actual gradient aggregation time: 0.037218
-MPI Rank 0: Async gradient aggregation wait time: 0.006692
-MPI Rank 0: Actual gradient aggregation time: 0.00961
-MPI Rank 0: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.5001s; samplesPerSecond = 5119.3
-MPI Rank 0: Async gradient aggregation wait time: 0.008523
-MPI Rank 0: Actual gradient aggregation time: 0.048427
-MPI Rank 0: Async gradient aggregation wait time: 0.008338
-MPI Rank 0: Actual gradient aggregation time: 0.049478
-MPI Rank 0: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.5037s; samplesPerSecond = 5082.8
-MPI Rank 0: Async gradient aggregation wait time: 0.0062
-MPI Rank 0: Actual gradient aggregation time: 0.05051
-MPI Rank 0: Async gradient aggregation wait time: 0.009383
-MPI Rank 0: Actual gradient aggregation time: 0.023248
-MPI Rank 0: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5108s; samplesPerSecond = 5011.8
-MPI Rank 0: Async gradient aggregation wait time: 0.009504
-MPI Rank 0: Actual gradient aggregation time: 0.012254
-MPI Rank 0: 05/03/2016 14:27:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.06557s
-MPI Rank 0: 05/03/2016 14:27:19: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
+MPI Rank 0: Actual gradient aggregation time: 0.046901
+MPI Rank 0: 05/03/2016 14:23:42:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4964s; samplesPerSecond = 4641.7
+MPI Rank 0: Async gradient aggregation wait time: 0.007906
+MPI Rank 0: Actual gradient aggregation time: 0.049241
+MPI Rank 0: Async gradient aggregation wait time: 0.020522
+MPI Rank 0: Actual gradient aggregation time: 0.046841
+MPI Rank 0: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.4804s; samplesPerSecond = 5328.7
+MPI Rank 0: Async gradient aggregation wait time: 0.006105
+MPI Rank 0: Actual gradient aggregation time: 0.047967
+MPI Rank 0: Async gradient aggregation wait time: 0.006039
+MPI Rank 0: Actual gradient aggregation time: 0.046026
+MPI Rank 0: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.4940s; samplesPerSecond = 5181.9
+MPI Rank 0: Async gradient aggregation wait time: 0.017919
+MPI Rank 0: Actual gradient aggregation time: 0.048794
+MPI Rank 0: Async gradient aggregation wait time: 0.021786
+MPI Rank 0: Actual gradient aggregation time: 0.04601
+MPI Rank 0: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5035s; samplesPerSecond = 5084.2
+MPI Rank 0: Async gradient aggregation wait time: 0.021766
+MPI Rank 0: Actual gradient aggregation time: 0.046317
+MPI Rank 0: Async gradient aggregation wait time: 0.018789
+MPI Rank 0: Actual gradient aggregation time: 0.046049
+MPI Rank 0: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.4964s; samplesPerSecond = 5156.7
+MPI Rank 0: Async gradient aggregation wait time: 0.02002
+MPI Rank 0: Actual gradient aggregation time: 0.045921
+MPI Rank 0: Async gradient aggregation wait time: 0.019259
+MPI Rank 0: Actual gradient aggregation time: 0.049466
+MPI Rank 0: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.4927s; samplesPerSecond = 5195.3
+MPI Rank 0: Async gradient aggregation wait time: 0.020148
+MPI Rank 0: Actual gradient aggregation time: 0.044208
+MPI Rank 0: Async gradient aggregation wait time: 0.003231
+MPI Rank 0: Actual gradient aggregation time: 0.044953
+MPI Rank 0: 05/03/2016 14:23:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.4971s; samplesPerSecond = 5149.9
+MPI Rank 0: Async gradient aggregation wait time: 0.018719
+MPI Rank 0: Actual gradient aggregation time: 0.04831
+MPI Rank 0: Async gradient aggregation wait time: 0.021696
+MPI Rank 0: Actual gradient aggregation time: 0.04584
+MPI Rank 0: 05/03/2016 14:23:46:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5121s; samplesPerSecond = 4998.9
+MPI Rank 0: Async gradient aggregation wait time: 0.009354
+MPI Rank 0: Actual gradient aggregation time: 0.013392
+MPI Rank 0: 05/03/2016 14:23:46: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.00103s
+MPI Rank 0: 05/03/2016 14:23:46: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:23:46: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 05/03/2016 14:23:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1e-006
-MPI Rank 0: Actual gradient aggregation time: 0.110708
-MPI Rank 0: Async gradient aggregation wait time: 0.016903
-MPI Rank 0: Actual gradient aggregation time: 0.170246
-MPI Rank 0: 05/03/2016 14:27:21:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5220s; samplesPerSecond = 6055.3
-MPI Rank 0: Async gradient aggregation wait time: 0.062195
-MPI Rank 0: Actual gradient aggregation time: 0.009103
-MPI Rank 0: Async gradient aggregation wait time: 0.047643
-MPI Rank 0: Actual gradient aggregation time: 0.107233
-MPI Rank 0: 05/03/2016 14:27:23:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.6712s; samplesPerSecond = 6127.2
-MPI Rank 0: 05/03/2016 14:27:23: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.22315s
-MPI Rank 0: 05/03/2016 14:27:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
+MPI Rank 0: Actual gradient aggregation time: 0.060562
+MPI Rank 0: Async gradient aggregation wait time: 0.06692
+MPI Rank 0: Actual gradient aggregation time: 0.138673
+MPI Rank 0: 05/03/2016 14:23:47:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5418s; samplesPerSecond = 5977.6
+MPI Rank 0: Async gradient aggregation wait time: 0.051111
+MPI Rank 0: Actual gradient aggregation time: 0.150091
+MPI Rank 0: Async gradient aggregation wait time: 0.042699
+MPI Rank 0: Actual gradient aggregation time: 0.147197
+MPI Rank 0: 05/03/2016 14:23:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.7161s; samplesPerSecond = 5967.2
+MPI Rank 0: 05/03/2016 14:23:49: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.28478s
+MPI Rank 0: 05/03/2016 14:23:49: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3'
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:23: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: 05/03/2016 14:23:49: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 0: Async gradient aggregation wait time: 0.043088
-MPI Rank 0: Actual gradient aggregation time: 0.009005
-MPI Rank 0: Async gradient aggregation wait time: 0.008973
-MPI Rank 0: Actual gradient aggregation time: 0.113495
-MPI Rank 0: 05/03/2016 14:27:24:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.5946s; samplesPerSecond = 5779.5
-MPI Rank 0: Async gradient aggregation wait time: 0.00599
-MPI Rank 0: Actual gradient aggregation time: 0.149004
-MPI Rank 0: Async gradient aggregation wait time: 0.009067
-MPI Rank 0: Actual gradient aggregation time: 0.148684
-MPI Rank 0: 05/03/2016 14:27:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.4669s; samplesPerSecond = 6980.8
-MPI Rank 0: Async gradient aggregation wait time: 0.013827
-MPI Rank 0: 05/03/2016 14:27:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.1221s
-MPI Rank 0: 05/03/2016 14:27:26: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
-MPI Rank 0: 05/03/2016 14:27:26: CNTKCommandTrainEnd: speechTrain
+MPI Rank 0: 05/03/2016 14:23:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: Async gradient aggregation wait time: 0.043316
+MPI Rank 0: Actual gradient aggregation time: 0.151123
+MPI Rank 0: Async gradient aggregation wait time: 0.057245
+MPI Rank 0: Actual gradient aggregation time: 0.032763
+MPI Rank 0: 05/03/2016 14:23:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.4886s; samplesPerSecond = 6190.9
+MPI Rank 0: Async gradient aggregation wait time: 0.008143
+MPI Rank 0: Actual gradient aggregation time: 0.204784
+MPI Rank 0: Async gradient aggregation wait time: 0.069445
+MPI Rank 0: Actual gradient aggregation time: 0.112673
+MPI Rank 0: 05/03/2016 14:23:52:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.5619s; samplesPerSecond = 6555.9
+MPI Rank 0: Async gradient aggregation wait time: 0.013466
+MPI Rank 0: 05/03/2016 14:23:52: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.10446s
+MPI Rank 0: 05/03/2016 14:23:52: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn'
+MPI Rank 0: 05/03/2016 14:23:52: CNTKCommandTrainEnd: speechTrain
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:26: Action "train" complete.
+MPI Rank 0: 05/03/2016 14:23:52: Action "train" complete.
 MPI Rank 0: 
-MPI Rank 0: 05/03/2016 14:27:26: __COMPLETED__
-MPI Rank 1: 05/03/2016 14:26:59: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
-MPI Rank 1: 05/03/2016 14:26:59: -------------------------------------------------------------------
-MPI Rank 1: 05/03/2016 14:26:59: Build info: 
+MPI Rank 0: 05/03/2016 14:23:52: __COMPLETED__
+MPI Rank 1: 05/03/2016 14:23:25: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1
+MPI Rank 1: 05/03/2016 14:23:25: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:23:25: Build info: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: 		Built time: May  3 2016 13:23:06
-MPI Rank 1: 05/03/2016 14:26:59: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 1: 05/03/2016 14:26:59: 		Build type: Release
-MPI Rank 1: 05/03/2016 14:26:59: 		Build target: GPU
-MPI Rank 1: 05/03/2016 14:26:59: 		With 1bit-SGD: no
-MPI Rank 1: 05/03/2016 14:26:59: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 1: 05/03/2016 14:26:59: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 1: 05/03/2016 14:26:59: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 1: 05/03/2016 14:26:59: 		Build Branch: HEAD
-MPI Rank 1: 05/03/2016 14:26:59: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 1: 05/03/2016 14:26:59: 		Built by svcphil on LIANA-09-w
-MPI Rank 1: 05/03/2016 14:26:59: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 1: 05/03/2016 14:26:59: -------------------------------------------------------------------
+MPI Rank 1: 05/03/2016 14:23:25: 		Built time: May  3 2016 13:23:06
+MPI Rank 1: 05/03/2016 14:23:25: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 1: 05/03/2016 14:23:25: 		Build type: Release
+MPI Rank 1: 05/03/2016 14:23:25: 		Build target: GPU
+MPI Rank 1: 05/03/2016 14:23:25: 		With 1bit-SGD: no
+MPI Rank 1: 05/03/2016 14:23:25: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 1: 05/03/2016 14:23:25: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 1: 05/03/2016 14:23:25: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 1: 05/03/2016 14:23:25: 		Build Branch: HEAD
+MPI Rank 1: 05/03/2016 14:23:25: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 1: 05/03/2016 14:23:25: 		Built by svcphil on LIANA-09-w
+MPI Rank 1: 05/03/2016 14:23:25: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 1: 05/03/2016 14:23:25: -------------------------------------------------------------------
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: Running on cntk-muc02 at 2016/05/03 14:26:59
-MPI Rank 1: 05/03/2016 14:26:59: Command line: 
-MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: 05/03/2016 14:23:25: Running on cntk-muc02 at 2016/05/03 14:23:25
+MPI Rank 1: 05/03/2016 14:23:25: Command line: 
+MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:26:59: precision = "float"
+MPI Rank 1: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:23:25: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = $DeviceId$
 MPI Rank 1: parallelTrain = true
@@ -800,14 +806,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -815,18 +819,18 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 1: 05/03/2016 14:26:59: precision = "float"
+MPI Rank 1: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:23:25: precision = "float"
 MPI Rank 1: command = speechTrain
 MPI Rank 1: deviceId = 0
 MPI Rank 1: parallelTrain = true
 MPI Rank 1: speechTrain = [
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -910,14 +914,12 @@ MPI Rank 1:         ]
 MPI Rank 1:     ]
 MPI Rank 1: ]
 MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: DeviceId=0
 MPI Rank 1: timestamping=true
-MPI Rank 1: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 1: speechTrain=[reader=[prefetch=true]]
 MPI Rank 1: numCPUThreads=1
 MPI Rank 1: precision=double
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -925,24 +927,24 @@ MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 1: 05/03/2016 14:23:25: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 1: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 1: configparameters: cntk.cntk:deviceId=0
 MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 1: configparameters: cntk.cntk:precision=double
-MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 1: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 1:     action = "train"
-MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 1:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 1:     deviceId = 0
 MPI Rank 1:     traceLevel = 1
 MPI Rank 1:     SimpleNetworkBuilder = [
@@ -1024,35 +1026,34 @@ MPI Rank 1:             labelDim = 132
 MPI Rank 1:             labelType = "category"
 MPI Rank 1:         ]
 MPI Rank 1:     ]
-MPI Rank 1: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 1: 
-MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 1: configparameters: cntk.cntk:timestamping=true
-MPI Rank 1: 05/03/2016 14:26:59: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 1: 05/03/2016 14:26:59: Commands: speechTrain
-MPI Rank 1: 05/03/2016 14:26:59: Precision = "double"
-MPI Rank 1: 05/03/2016 14:26:59: Using 1 CPU threads.
-MPI Rank 1: 05/03/2016 14:26:59: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 1: 05/03/2016 14:26:59: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 1: 05/03/2016 14:26:59: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 1: 05/03/2016 14:23:25: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 1: 05/03/2016 14:23:25: Commands: speechTrain
+MPI Rank 1: 05/03/2016 14:23:25: Precision = "double"
+MPI Rank 1: 05/03/2016 14:23:25: Using 1 CPU threads.
+MPI Rank 1: 05/03/2016 14:23:25: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 1: 05/03/2016 14:23:25: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 1: 05/03/2016 14:23:25: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: ##############################################################################
-MPI Rank 1: 05/03/2016 14:26:59: #                                                                            #
-MPI Rank 1: 05/03/2016 14:26:59: # Action "train"                                                             #
-MPI Rank 1: 05/03/2016 14:26:59: #                                                                            #
-MPI Rank 1: 05/03/2016 14:26:59: ##############################################################################
+MPI Rank 1: 05/03/2016 14:23:25: ##############################################################################
+MPI Rank 1: 05/03/2016 14:23:25: #                                                                            #
+MPI Rank 1: 05/03/2016 14:23:25: # Action "train"                                                             #
+MPI Rank 1: 05/03/2016 14:23:25: #                                                                            #
+MPI Rank 1: 05/03/2016 14:23:25: ##############################################################################
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:26:59: CNTKCommandTrainBegin: speechTrain
+MPI Rank 1: 05/03/2016 14:23:25: CNTKCommandTrainBegin: speechTrain
 MPI Rank 1: SimpleNetworkBuilder Using GPU 0
-MPI Rank 1: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 1: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 1: reading script file glob_0000.scp ... 948 entries
 MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 1: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 1: label set 0: 129 classes
+MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: Creating virgin network.
+MPI Rank 1: 05/03/2016 14:23:26: Creating virgin network.
 MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 1: 
 MPI Rank 1: Post-processing network...
@@ -1105,14 +1106,14 @@ MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 1: 
 MPI Rank 1: Post-processing network complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: Created model with 25 nodes on GPU 0.
+MPI Rank 1: 05/03/2016 14:23:26: Created model with 25 nodes on GPU 0.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: Training criterion node(s):
-MPI Rank 1: 05/03/2016 14:27:00: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 1: 05/03/2016 14:23:26: Training criterion node(s):
+MPI Rank 1: 05/03/2016 14:23:26: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: Evaluation criterion node(s):
+MPI Rank 1: 05/03/2016 14:23:26: Evaluation criterion node(s):
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 1: 05/03/2016 14:23:26: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 1: 
 MPI Rank 1: 
 MPI Rank 1: Allocating matrices for forward and/or backward propagation.
@@ -1120,189 +1121,195 @@ MPI Rank 1:
 MPI Rank 1: Memory Sharing Structure:
 MPI Rank 1: 
 MPI Rank 1: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 1: 00000096B747F650: {[features Value[363 x *]] }
-MPI Rank 1: 00000096D8C9D620: {[B2 Value[132 x 1]] }
-MPI Rank 1: 00000096D8C9D800: {[W0 Value[512 x 363]] }
-MPI Rank 1: 00000096D8C9DBC0: {[W2 Value[132 x 512]] }
-MPI Rank 1: 00000096D8C9DE40: {[B0 Value[512 x 1]] }
-MPI Rank 1: 00000096D8C9E340: {[B1 Value[512 x 1]] }
-MPI Rank 1: 00000096D8C9E480: {[labels Value[132 x *]] }
-MPI Rank 1: 00000096D8C9EC00: {[MeanOfFeatures Value[363]] }
-MPI Rank 1: 00000096D8C9ECA0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 1: 00000096D8C9F240: {[W1 Value[512 x 512]] }
-MPI Rank 1: 00000096D967C030: {[W0*features Value[512 x *]] }
-MPI Rank 1: 00000096D967C3F0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 1: 00000096D967C530: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000096D967C5D0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 1: 00000096D967C670: {[EvalErrorPrediction Value[1]] }
-MPI Rank 1: 00000096D967C8F0: {[LogOfPrior Value[132]] }
-MPI Rank 1: 00000096D967C990: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 1: 00000096D967CA30: {[Prior Value[132]] }
-MPI Rank 1: 00000096D967CAD0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 1: 00000096D967CCB0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096D967D110: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096D967D250: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-MPI Rank 1: 00000096D967D2F0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 1: 00000096D967D4D0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 1: 00000096D967D610: {[B2 Gradient[132 x 1]] }
-MPI Rank 1: 00000096D967D750: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 1: 00000096D967D7F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 1: 00000096D967DBB0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000619A22E2C0: {[W1 Value[512 x 512]] }
+MPI Rank 1: 000000619A22E540: {[W2 Value[132 x 512]] }
+MPI Rank 1: 000000619A22E900: {[InvStdOfFeatures Value[363]] }
+MPI Rank 1: 000000619A22F440: {[MeanOfFeatures Value[363]] }
+MPI Rank 1: 000000619A22F580: {[B1 Value[512 x 1]] }
+MPI Rank 1: 000000619A22FA80: {[W0 Value[512 x 363]] }
+MPI Rank 1: 000000619A22FC60: {[B0 Value[512 x 1]] }
+MPI Rank 1: 000000619A230160: {[B2 Value[132 x 1]] }
+MPI Rank 1: 000000619C957A40: {[LogOfPrior Value[132]] }
+MPI Rank 1: 000000619C957AE0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 1: 000000619C957B80: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 1: 000000619C957C20: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000619C957D60: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 1: 000000619C957E00: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 1: 000000619C957EA0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 1: 000000619C958260: {[B2 Gradient[132 x 1]] }
+MPI Rank 1: 000000619C958440: {[labels Value[132 x *]] }
+MPI Rank 1: 000000619C9584E0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 1: 000000619C958620: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 1: 000000619C958940: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 1: 000000619C9589E0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 1: 000000619C958B20: {[Prior Value[132]] }
+MPI Rank 1: 000000619C958BC0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 1: 000000619C958C60: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 1: 000000619C959200: {[EvalErrorPrediction Value[1]] }
+MPI Rank 1: 000000619C9597A0: {[W0*features Value[512 x *]] }
+MPI Rank 1: 000000619C959840: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 1: 00000061FB6B8350: {[features Value[363 x *]] }
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 1: 05/03/2016 14:23:26: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:00: 	MeanOfFeatures = Mean()
-MPI Rank 1: 05/03/2016 14:27:00: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 1: 05/03/2016 14:27:00: 	Prior = Mean()
+MPI Rank 1: 05/03/2016 14:23:26: 	MeanOfFeatures = Mean()
+MPI Rank 1: 05/03/2016 14:23:26: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 1: 05/03/2016 14:23:26: 	Prior = Mean()
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:05: Precomputing --> Completed.
+MPI Rank 1: 05/03/2016 14:23:31: Precomputing --> Completed.
 MPI Rank 1: 
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:05: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: 05/03/2016 14:23:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:05: Starting minibatch loop.
-MPI Rank 1: 05/03/2016 14:27:05:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4673s; samplesPerSecond = 1369.6
-MPI Rank 1: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3238s; samplesPerSecond = 1976.5
-MPI Rank 1: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3136s; samplesPerSecond = 2040.8
-MPI Rank 1: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3136s; samplesPerSecond = 2041.1
-MPI Rank 1: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3133s; samplesPerSecond = 2042.5
-MPI Rank 1: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3135s; samplesPerSecond = 2041.7
-MPI Rank 1: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 1: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3129s; samplesPerSecond = 2045.2
-MPI Rank 1: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3126s; samplesPerSecond = 2047.1
-MPI Rank 1: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3130s; samplesPerSecond = 2044.5
-MPI Rank 1: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3133s; samplesPerSecond = 2043.1
-MPI Rank 1: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3126s; samplesPerSecond = 2047.4
-MPI Rank 1: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3133s; samplesPerSecond = 2042.5
-MPI Rank 1: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3138s; samplesPerSecond = 2039.3
-MPI Rank 1: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3142s; samplesPerSecond = 2037.1
-MPI Rank 1: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3129s; samplesPerSecond = 2045.3
-MPI Rank 1: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3138s; samplesPerSecond = 2039.6
-MPI Rank 1: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3134s; samplesPerSecond = 2042.1
-MPI Rank 1: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3232s; samplesPerSecond = 1979.9
-MPI Rank 1: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 1: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3345s; samplesPerSecond = 1913.1
-MPI Rank 1: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3135s; samplesPerSecond = 2041.7
-MPI Rank 1: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3131s; samplesPerSecond = 2044.2
-MPI Rank 1: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3132s; samplesPerSecond = 2043.3
-MPI Rank 1: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3131s; samplesPerSecond = 2044.1
-MPI Rank 1: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3129s; samplesPerSecond = 2045.3
-MPI Rank 1: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3138s; samplesPerSecond = 2039.3
-MPI Rank 1: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3112s; samplesPerSecond = 2056.7
-MPI Rank 1: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3167s; samplesPerSecond = 2021.1
-MPI Rank 1: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 1: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3126s; samplesPerSecond = 2047.2
-MPI Rank 1: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.2616s; samplesPerSecond = 2446.1
-MPI Rank 1: 05/03/2016 14:27:15: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.255s
+MPI Rank 1: 05/03/2016 14:23:31: Starting minibatch loop.
+MPI Rank 1: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3074s; samplesPerSecond = 2082.0
+MPI Rank 1: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3128s; samplesPerSecond = 2045.9
+MPI Rank 1: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3135s; samplesPerSecond = 2041.7
+MPI Rank 1: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3139s; samplesPerSecond = 2039.0
+MPI Rank 1: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3136s; samplesPerSecond = 2041.0
+MPI Rank 1: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3122s; samplesPerSecond = 2049.6
+MPI Rank 1: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3141s; samplesPerSecond = 2037.6
+MPI Rank 1: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3142s; samplesPerSecond = 2036.7
+MPI Rank 1: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3544s; samplesPerSecond = 1806.0
+MPI Rank 1: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3029s; samplesPerSecond = 2112.7
+MPI Rank 1: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3132s; samplesPerSecond = 2043.3
+MPI Rank 1: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3123s; samplesPerSecond = 2049.2
+MPI Rank 1: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.2923s; samplesPerSecond = 2189.6
+MPI Rank 1: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3332s; samplesPerSecond = 1920.8
+MPI Rank 1: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3017s; samplesPerSecond = 2121.3
+MPI Rank 1: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3132s; samplesPerSecond = 2043.6
+MPI Rank 1: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3140s; samplesPerSecond = 2037.9
+MPI Rank 1: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3132s; samplesPerSecond = 2043.2
+MPI Rank 1: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3129s; samplesPerSecond = 2045.1
+MPI Rank 1: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3123s; samplesPerSecond = 2049.4
+MPI Rank 1: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3127s; samplesPerSecond = 2046.8
+MPI Rank 1: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3136s; samplesPerSecond = 2040.8
+MPI Rank 1: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3133s; samplesPerSecond = 2043.0
+MPI Rank 1: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3129s; samplesPerSecond = 2045.1
+MPI Rank 1: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3132s; samplesPerSecond = 2043.6
+MPI Rank 1: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3131s; samplesPerSecond = 2044.0
+MPI Rank 1: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3129s; samplesPerSecond = 2045.4
+MPI Rank 1: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3126s; samplesPerSecond = 2047.6
+MPI Rank 1: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.2931s; samplesPerSecond = 2183.8
+MPI Rank 1: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3232s; samplesPerSecond = 1980.3
+MPI Rank 1: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3134s; samplesPerSecond = 2042.1
+MPI Rank 1: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3126s; samplesPerSecond = 2047.2
+MPI Rank 1: 05/03/2016 14:23:41: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0363s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:15: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: 05/03/2016 14:23:42: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Actual gradient aggregation time: 0.040006
-MPI Rank 1: Async gradient aggregation wait time: 0.009557
-MPI Rank 1: Actual gradient aggregation time: 0.058902
-MPI Rank 1: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4840s; samplesPerSecond = 4760.3
-MPI Rank 1: Async gradient aggregation wait time: 0.028787
-MPI Rank 1: Actual gradient aggregation time: 0.076085
-MPI Rank 1: Async gradient aggregation wait time: 0.006869
-MPI Rank 1: Actual gradient aggregation time: 0.025467
-MPI Rank 1: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.5216s; samplesPerSecond = 4908.1
-MPI Rank 1: Async gradient aggregation wait time: 0.006869
-MPI Rank 1: Actual gradient aggregation time: 0.017827
-MPI Rank 1: Async gradient aggregation wait time: 0.008503
-MPI Rank 1: Actual gradient aggregation time: 0.009653
-MPI Rank 1: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.5047s; samplesPerSecond = 5072.1
-MPI Rank 1: Async gradient aggregation wait time: 0.008282
-MPI Rank 1: Actual gradient aggregation time: 0.039126
-MPI Rank 1: Async gradient aggregation wait time: 0.01136
-MPI Rank 1: Actual gradient aggregation time: 0.047787
-MPI Rank 1: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5162s; samplesPerSecond = 4959.2
-MPI Rank 1: Async gradient aggregation wait time: 0.008415
-MPI Rank 1: Actual gradient aggregation time: 0.009719
-MPI Rank 1: Async gradient aggregation wait time: 0.009227
-MPI Rank 1: Actual gradient aggregation time: 0.009127
-MPI Rank 1: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5023s; samplesPerSecond = 5097.0
-MPI Rank 1: Async gradient aggregation wait time: 0.009298
-MPI Rank 1: Actual gradient aggregation time: 0.010197
-MPI Rank 1: Async gradient aggregation wait time: 0.006045
-MPI Rank 1: Actual gradient aggregation time: 0.010335
-MPI Rank 1: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.4999s; samplesPerSecond = 5121.3
-MPI Rank 1: Async gradient aggregation wait time: 0.008541
-MPI Rank 1: Actual gradient aggregation time: 0.035747
-MPI Rank 1: Async gradient aggregation wait time: 0.00916
-MPI Rank 1: Actual gradient aggregation time: 0.029044
-MPI Rank 1: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.5039s; samplesPerSecond = 5080.8
-MPI Rank 1: Async gradient aggregation wait time: 0.006971
-MPI Rank 1: Actual gradient aggregation time: 0.020013
-MPI Rank 1: Async gradient aggregation wait time: 0.011032
-MPI Rank 1: Actual gradient aggregation time: 0.024034
-MPI Rank 1: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5101s; samplesPerSecond = 5018.6
-MPI Rank 1: Async gradient aggregation wait time: 0.01
-MPI Rank 1: Actual gradient aggregation time: 0.011791
-MPI Rank 1: 05/03/2016 14:27:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.0739s
+MPI Rank 1: 05/03/2016 14:23:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Actual gradient aggregation time: 0.029856
+MPI Rank 1: Async gradient aggregation wait time: 0.018468
+MPI Rank 1: Actual gradient aggregation time: 0.040457
+MPI Rank 1: 05/03/2016 14:23:42:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4917s; samplesPerSecond = 4685.5
+MPI Rank 1: Async gradient aggregation wait time: 0.023229
+MPI Rank 1: Actual gradient aggregation time: 0.048624
+MPI Rank 1: Async gradient aggregation wait time: 0.007096
+MPI Rank 1: Actual gradient aggregation time: 0.047263
+MPI Rank 1: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.4875s; samplesPerSecond = 5251.6
+MPI Rank 1: Async gradient aggregation wait time: 0.006759
+MPI Rank 1: Actual gradient aggregation time: 0.016593
+MPI Rank 1: Async gradient aggregation wait time: 0.025889
+MPI Rank 1: Actual gradient aggregation time: 0.046405
+MPI Rank 1: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.4941s; samplesPerSecond = 5180.7
+MPI Rank 1: Async gradient aggregation wait time: 0.008492
+MPI Rank 1: Actual gradient aggregation time: 0.04827
+MPI Rank 1: Async gradient aggregation wait time: 0.010224
+MPI Rank 1: Actual gradient aggregation time: 0.046228
+MPI Rank 1: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5036s; samplesPerSecond = 5083.8
+MPI Rank 1: Async gradient aggregation wait time: 0.00867
+MPI Rank 1: Actual gradient aggregation time: 0.045611
+MPI Rank 1: Async gradient aggregation wait time: 0.008816
+MPI Rank 1: Actual gradient aggregation time: 0.046453
+MPI Rank 1: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.4964s; samplesPerSecond = 5156.8
+MPI Rank 1: Async gradient aggregation wait time: 0.008117
+MPI Rank 1: Actual gradient aggregation time: 0.013697
+MPI Rank 1: Async gradient aggregation wait time: 0.006658
+MPI Rank 1: Actual gradient aggregation time: 0.0462
+MPI Rank 1: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.4928s; samplesPerSecond = 5194.7
+MPI Rank 1: Async gradient aggregation wait time: 0.010357
+MPI Rank 1: Actual gradient aggregation time: 0.044522
+MPI Rank 1: Async gradient aggregation wait time: 0.018601
+MPI Rank 1: Actual gradient aggregation time: 0.049578
+MPI Rank 1: 05/03/2016 14:23:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.4973s; samplesPerSecond = 5148.0
+MPI Rank 1: Async gradient aggregation wait time: 0.008843
+MPI Rank 1: Actual gradient aggregation time: 0.048819
+MPI Rank 1: Async gradient aggregation wait time: 0.010676
+MPI Rank 1: Actual gradient aggregation time: 0.045988
+MPI Rank 1: 05/03/2016 14:23:46:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5100s; samplesPerSecond = 5019.6
+MPI Rank 1: Async gradient aggregation wait time: 0.009308
+MPI Rank 1: Actual gradient aggregation time: 0.013104
+MPI Rank 1: 05/03/2016 14:23:46: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.0017s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:23:46: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.013217
-MPI Rank 1: Actual gradient aggregation time: 0.114118
-MPI Rank 1: Async gradient aggregation wait time: 0.016065
-MPI Rank 1: Actual gradient aggregation time: 0.172356
-MPI Rank 1: 05/03/2016 14:27:21:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5237s; samplesPerSecond = 6048.5
-MPI Rank 1: Async gradient aggregation wait time: 0.007127
-MPI Rank 1: Actual gradient aggregation time: 0.009934
-MPI Rank 1: Async gradient aggregation wait time: 0.172296
-MPI Rank 1: Actual gradient aggregation time: 0.115345
-MPI Rank 1: 05/03/2016 14:27:23:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.6702s; samplesPerSecond = 6130.8
-MPI Rank 1: 05/03/2016 14:27:23: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.22404s
+MPI Rank 1: 05/03/2016 14:23:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.003155
+MPI Rank 1: Actual gradient aggregation time: 0.184961
+MPI Rank 1: Async gradient aggregation wait time: 0.01729
+MPI Rank 1: Actual gradient aggregation time: 0.134899
+MPI Rank 1: 05/03/2016 14:23:47:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5424s; samplesPerSecond = 5974.9
+MPI Rank 1: Async gradient aggregation wait time: 0.000428
+MPI Rank 1: Actual gradient aggregation time: 0.152197
+MPI Rank 1: Async gradient aggregation wait time: 0.045728
+MPI Rank 1: Actual gradient aggregation time: 0.147409
+MPI Rank 1: 05/03/2016 14:23:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.7164s; samplesPerSecond = 5966.1
+MPI Rank 1: 05/03/2016 14:23:49: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.28687s
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:23: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: 05/03/2016 14:23:49: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 1: Async gradient aggregation wait time: 0.04232
-MPI Rank 1: Actual gradient aggregation time: 0.097652
-MPI Rank 1: Async gradient aggregation wait time: 0.009803
-MPI Rank 1: Actual gradient aggregation time: 0.037889
-MPI Rank 1: 05/03/2016 14:27:24:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.5918s; samplesPerSecond = 5789.8
-MPI Rank 1: Async gradient aggregation wait time: 0.00672
-MPI Rank 1: Actual gradient aggregation time: 0.009064
-MPI Rank 1: Async gradient aggregation wait time: 0.009881
-MPI Rank 1: Actual gradient aggregation time: 0.145762
-MPI Rank 1: 05/03/2016 14:27:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.4703s; samplesPerSecond = 6964.6
-MPI Rank 1: Async gradient aggregation wait time: 0.01362
-MPI Rank 1: 05/03/2016 14:27:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.12271s
-MPI Rank 1: 05/03/2016 14:27:26: CNTKCommandTrainEnd: speechTrain
+MPI Rank 1: 05/03/2016 14:23:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: Async gradient aggregation wait time: 0.042599
+MPI Rank 1: Actual gradient aggregation time: 0.008818
+MPI Rank 1: Async gradient aggregation wait time: 0.008841
+MPI Rank 1: Actual gradient aggregation time: 0.039603
+MPI Rank 1: 05/03/2016 14:23:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.4884s; samplesPerSecond = 6192.0
+MPI Rank 1: Async gradient aggregation wait time: 0.011112
+MPI Rank 1: Actual gradient aggregation time: 0.120284
+MPI Rank 1: Async gradient aggregation wait time: 0.028091
+MPI Rank 1: Actual gradient aggregation time: 0.113305
+MPI Rank 1: 05/03/2016 14:23:52:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.5619s; samplesPerSecond = 6555.9
+MPI Rank 1: Async gradient aggregation wait time: 0.013569
+MPI Rank 1: 05/03/2016 14:23:52: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.10476s
+MPI Rank 1: 05/03/2016 14:23:52: CNTKCommandTrainEnd: speechTrain
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:26: Action "train" complete.
+MPI Rank 1: 05/03/2016 14:23:52: Action "train" complete.
 MPI Rank 1: 
-MPI Rank 1: 05/03/2016 14:27:26: __COMPLETED__
-MPI Rank 2: 05/03/2016 14:27:00: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
-MPI Rank 2: 05/03/2016 14:27:00: -------------------------------------------------------------------
-MPI Rank 2: 05/03/2016 14:27:00: Build info: 
+MPI Rank 1: 05/03/2016 14:23:52: __COMPLETED__
+MPI Rank 2: 05/03/2016 14:23:26: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2
+MPI Rank 2: 05/03/2016 14:23:26: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:23:26: Build info: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: 		Built time: May  3 2016 13:23:06
-MPI Rank 2: 05/03/2016 14:27:00: 		Last modified date: Mon Apr 18 00:00:12 2016
-MPI Rank 2: 05/03/2016 14:27:00: 		Build type: Release
-MPI Rank 2: 05/03/2016 14:27:00: 		Build target: GPU
-MPI Rank 2: 05/03/2016 14:27:00: 		With 1bit-SGD: no
-MPI Rank 2: 05/03/2016 14:27:00: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-MPI Rank 2: 05/03/2016 14:27:00: 		CUB_PATH: C:\src\cub-1.4.1
-MPI Rank 2: 05/03/2016 14:27:00: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-MPI Rank 2: 05/03/2016 14:27:00: 		Build Branch: HEAD
-MPI Rank 2: 05/03/2016 14:27:00: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-MPI Rank 2: 05/03/2016 14:27:00: 		Built by svcphil on LIANA-09-w
-MPI Rank 2: 05/03/2016 14:27:00: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-MPI Rank 2: 05/03/2016 14:27:00: -------------------------------------------------------------------
+MPI Rank 2: 05/03/2016 14:23:26: 		Built time: May  3 2016 13:23:06
+MPI Rank 2: 05/03/2016 14:23:26: 		Last modified date: Mon Apr 18 00:00:12 2016
+MPI Rank 2: 05/03/2016 14:23:26: 		Build type: Release
+MPI Rank 2: 05/03/2016 14:23:26: 		Build target: GPU
+MPI Rank 2: 05/03/2016 14:23:26: 		With 1bit-SGD: no
+MPI Rank 2: 05/03/2016 14:23:26: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+MPI Rank 2: 05/03/2016 14:23:26: 		CUB_PATH: C:\src\cub-1.4.1
+MPI Rank 2: 05/03/2016 14:23:26: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+MPI Rank 2: 05/03/2016 14:23:26: 		Build Branch: HEAD
+MPI Rank 2: 05/03/2016 14:23:26: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+MPI Rank 2: 05/03/2016 14:23:26: 		Built by svcphil on LIANA-09-w
+MPI Rank 2: 05/03/2016 14:23:26: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+MPI Rank 2: 05/03/2016 14:23:26: -------------------------------------------------------------------
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: Running on cntk-muc02 at 2016/05/03 14:27:00
-MPI Rank 2: 05/03/2016 14:27:00: Command line: 
-MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]  speechTrain=[reader=[prefetch=true]]  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: 05/03/2016 14:23:26: Running on cntk-muc02 at 2016/05/03 14:23:26
+MPI Rank 2: 05/03/2016 14:23:26: Command line: 
+MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu  DeviceId=0  timestamping=true  numCPUThreads=1  precision=double  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]  speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]]  speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]  speechTrain=[SGD=[maxEpochs=4]]  speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]  stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:27:00: precision = "float"
+MPI Rank 2: 05/03/2016 14:23:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:23:26: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = $DeviceId$
 MPI Rank 2: parallelTrain = true
@@ -1392,14 +1399,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -1407,18 +1412,18 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:23:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-MPI Rank 2: 05/03/2016 14:27:00: precision = "float"
+MPI Rank 2: 05/03/2016 14:23:26: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:23:26: precision = "float"
 MPI Rank 2: command = speechTrain
 MPI Rank 2: deviceId = 0
 MPI Rank 2: parallelTrain = true
 MPI Rank 2: speechTrain = [
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1502,14 +1507,12 @@ MPI Rank 2:         ]
 MPI Rank 2:     ]
 MPI Rank 2: ]
 MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
-MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: DeviceId=0
 MPI Rank 2: timestamping=true
-MPI Rank 2: speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
-MPI Rank 2: speechTrain=[reader=[prefetch=true]]
 MPI Rank 2: numCPUThreads=1
 MPI Rank 2: precision=double
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]
@@ -1517,24 +1520,24 @@ MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGr
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]
 MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]]
 MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]
-MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:23:26: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+MPI Rank 2: 05/03/2016 14:23:26: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 MPI Rank 2: configparameters: cntk.cntk:command=speechTrain
 MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN
 MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 MPI Rank 2: configparameters: cntk.cntk:deviceId=0
 MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=1
-MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true
 MPI Rank 2: configparameters: cntk.cntk:precision=double
-MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
+MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu
 MPI Rank 2: configparameters: cntk.cntk:speechTrain=[
 MPI Rank 2:     action = "train"
-MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
+MPI Rank 2:     modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn"
 MPI Rank 2:     deviceId = 0
 MPI Rank 2:     traceLevel = 1
 MPI Rank 2:     SimpleNetworkBuilder = [
@@ -1616,35 +1619,34 @@ MPI Rank 2:             labelDim = 132
 MPI Rank 2:             labelType = "category"
 MPI Rank 2:         ]
 MPI Rank 2:     ]
-MPI Rank 2: ] [reader=[readerType=ExperimentalHTKMLFReader]] [reader=[prefetch=true]] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
+MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]]
 MPI Rank 2: 
-MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
+MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr
 MPI Rank 2: configparameters: cntk.cntk:timestamping=true
-MPI Rank 2: 05/03/2016 14:27:00: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-MPI Rank 2: 05/03/2016 14:27:00: Commands: speechTrain
-MPI Rank 2: 05/03/2016 14:27:00: Precision = "double"
-MPI Rank 2: 05/03/2016 14:27:00: Using 1 CPU threads.
-MPI Rank 2: 05/03/2016 14:27:00: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
-MPI Rank 2: 05/03/2016 14:27:00: CNTKCommandTrainInfo: speechTrain : 4
-MPI Rank 2: 05/03/2016 14:27:00: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
+MPI Rank 2: 05/03/2016 14:23:26: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+MPI Rank 2: 05/03/2016 14:23:26: Commands: speechTrain
+MPI Rank 2: 05/03/2016 14:23:26: Precision = "double"
+MPI Rank 2: 05/03/2016 14:23:26: Using 1 CPU threads.
+MPI Rank 2: 05/03/2016 14:23:26: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn
+MPI Rank 2: 05/03/2016 14:23:26: CNTKCommandTrainInfo: speechTrain : 4
+MPI Rank 2: 05/03/2016 14:23:26: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: ##############################################################################
-MPI Rank 2: 05/03/2016 14:27:00: #                                                                            #
-MPI Rank 2: 05/03/2016 14:27:00: # Action "train"                                                             #
-MPI Rank 2: 05/03/2016 14:27:00: #                                                                            #
-MPI Rank 2: 05/03/2016 14:27:00: ##############################################################################
+MPI Rank 2: 05/03/2016 14:23:26: ##############################################################################
+MPI Rank 2: 05/03/2016 14:23:26: #                                                                            #
+MPI Rank 2: 05/03/2016 14:23:26: # Action "train"                                                             #
+MPI Rank 2: 05/03/2016 14:23:26: #                                                                            #
+MPI Rank 2: 05/03/2016 14:23:26: ##############################################################################
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: CNTKCommandTrainBegin: speechTrain
+MPI Rank 2: 05/03/2016 14:23:26: CNTKCommandTrainBegin: speechTrain
 MPI Rank 2: SimpleNetworkBuilder Using GPU 0
-MPI Rank 2: Reading script file glob_0000.scp ... 948 entries
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-MPI Rank 2: HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+MPI Rank 2: reading script file glob_0000.scp ... 948 entries
 MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MPI Rank 2: MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+MPI Rank 2: label set 0: 129 classes
+MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:00: Creating virgin network.
+MPI Rank 2: 05/03/2016 14:23:26: Creating virgin network.
 MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8
 MPI Rank 2: 
 MPI Rank 2: Post-processing network...
@@ -1697,14 +1699,14 @@ MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input
 MPI Rank 2: 
 MPI Rank 2: Post-processing network complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: Created model with 25 nodes on GPU 0.
+MPI Rank 2: 05/03/2016 14:23:27: Created model with 25 nodes on GPU 0.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: Training criterion node(s):
-MPI Rank 2: 05/03/2016 14:27:01: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+MPI Rank 2: 05/03/2016 14:23:27: Training criterion node(s):
+MPI Rank 2: 05/03/2016 14:23:27: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: Evaluation criterion node(s):
+MPI Rank 2: 05/03/2016 14:23:27: Evaluation criterion node(s):
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: 	EvalErrorPrediction = ErrorPrediction
+MPI Rank 2: 05/03/2016 14:23:27: 	EvalErrorPrediction = ErrorPrediction
 MPI Rank 2: 
 MPI Rank 2: 
 MPI Rank 2: Allocating matrices for forward and/or backward propagation.
@@ -1712,160 +1714,166 @@ MPI Rank 2:
 MPI Rank 2: Memory Sharing Structure:
 MPI Rank 2: 
 MPI Rank 2: 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-MPI Rank 2: 00000092A3A534B0: {[features Value[363 x *]] }
-MPI Rank 2: 00000092C572EB80: {[labels Value[132 x *]] }
-MPI Rank 2: 00000092C572EFE0: {[InvStdOfFeatures Value[363]] }
-MPI Rank 2: 00000092C572F440: {[W2 Value[132 x 512]] }
-MPI Rank 2: 00000092C572F4E0: {[B2 Value[132 x 1]] }
-MPI Rank 2: 00000092C572F620: {[MeanOfFeatures Value[363]] }
-MPI Rank 2: 00000092C572FB20: {[W0 Value[512 x 363]] }
-MPI Rank 2: 00000092C572FE40: {[B0 Value[512 x 1]] }
-MPI Rank 2: 00000092C572FF80: {[W1 Value[512 x 512]] }
-MPI Rank 2: 00000092C57305C0: {[B1 Value[512 x 1]] }
-MPI Rank 2: 00000092C5C2DF30: {[EvalErrorPrediction Value[1]] }
-MPI Rank 2: 00000092C5C2DFD0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092C5C2E070: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-MPI Rank 2: 00000092C5C2E1B0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-MPI Rank 2: 00000092C5C2E250: {[W2*H1 Gradient[132 x 1 x *]] }
-MPI Rank 2: 00000092C5C2E570: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-MPI Rank 2: 00000092C5C2E610: {[CrossEntropyWithSoftmax Gradient[1]] }
-MPI Rank 2: 00000092C5C2E750: {[B2 Gradient[132 x 1]] }
-MPI Rank 2: 00000092C5C2E7F0: {[W0*features Value[512 x *]] }
-MPI Rank 2: 00000092C5C2E930: {[LogOfPrior Value[132]] }
-MPI Rank 2: 00000092C5C2E9D0: {[MVNormalizedFeatures Value[363 x *]] }
-MPI Rank 2: 00000092C5C2EA70: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092C5C2EBB0: {[CrossEntropyWithSoftmax Value[1]] }
-MPI Rank 2: 00000092C5C2EC50: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-MPI Rank 2: 00000092C5C2F330: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-MPI Rank 2: 00000092C5C2F510: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-MPI Rank 2: 00000092C5C2F6F0: {[Prior Value[132]] }
-MPI Rank 2: 00000092C5C2F8D0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000FD9FDB7050: {[B1 Value[512 x 1]] }
+MPI Rank 2: 000000FD9FDB7190: {[B0 Value[512 x 1]] }
+MPI Rank 2: 000000FD9FDB7550: {[W2 Value[132 x 512]] }
+MPI Rank 2: 000000FD9FDB7870: {[InvStdOfFeatures Value[363]] }
+MPI Rank 2: 000000FD9FDB7910: {[W1 Value[512 x 512]] }
+MPI Rank 2: 000000FD9FDB7AF0: {[B2 Value[132 x 1]] }
+MPI Rank 2: 000000FD9FDB81D0: {[MeanOfFeatures Value[363]] }
+MPI Rank 2: 000000FD9FDB8630: {[W0 Value[512 x 363]] }
+MPI Rank 2: 000000FDA047FCB0: {[CrossEntropyWithSoftmax Gradient[1]] }
+MPI Rank 2: 000000FDA047FFD0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+MPI Rank 2: 000000FDA0480110: {[MVNormalizedFeatures Value[363 x *]] }
+MPI Rank 2: 000000FDA04801B0: {[W2*H1 Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000FDA0480390: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+MPI Rank 2: 000000FDA0480570: {[Prior Value[132]] }
+MPI Rank 2: 000000FDA0480610: {[EvalErrorPrediction Value[1]] }
+MPI Rank 2: 000000FDA04806B0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+MPI Rank 2: 000000FDA0480750: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+MPI Rank 2: 000000FDA04807F0: {[labels Value[132 x *]] }
+MPI Rank 2: 000000FDA0480890: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FDA0480A70: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+MPI Rank 2: 000000FDA0480C50: {[LogOfPrior Value[132]] }
+MPI Rank 2: 000000FDA0480E30: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FDA0480F70: {[CrossEntropyWithSoftmax Value[1]] }
+MPI Rank 2: 000000FDA0481010: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+MPI Rank 2: 000000FDA0481150: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+MPI Rank 2: 000000FDA0481790: {[B2 Gradient[132 x 1]] }
+MPI Rank 2: 000000FDA0481970: {[W0*features Value[512 x *]] }
+MPI Rank 2: 000000FDFEE4C320: {[features Value[363 x *]] }
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: Precomputing --> 3 PreCompute nodes found.
+MPI Rank 2: 05/03/2016 14:23:27: Precomputing --> 3 PreCompute nodes found.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:01: 	MeanOfFeatures = Mean()
-MPI Rank 2: 05/03/2016 14:27:01: 	InvStdOfFeatures = InvStdDev()
-MPI Rank 2: 05/03/2016 14:27:01: 	Prior = Mean()
+MPI Rank 2: 05/03/2016 14:23:27: 	MeanOfFeatures = Mean()
+MPI Rank 2: 05/03/2016 14:23:27: 	InvStdOfFeatures = InvStdDev()
+MPI Rank 2: 05/03/2016 14:23:27: 	Prior = Mean()
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:05: Precomputing --> Completed.
+MPI Rank 2: 05/03/2016 14:23:31: Precomputing --> Completed.
 MPI Rank 2: 
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:05: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: 05/03/2016 14:23:31: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:05: Starting minibatch loop.
-MPI Rank 2: 05/03/2016 14:27:05:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.4466s; samplesPerSecond = 1432.9
-MPI Rank 2: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.2927s; samplesPerSecond = 2186.8
-MPI Rank 2: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3024s; samplesPerSecond = 2116.4
-MPI Rank 2: 05/03/2016 14:27:06:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3143s; samplesPerSecond = 2036.6
-MPI Rank 2: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3135s; samplesPerSecond = 2041.8
-MPI Rank 2: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 2: 05/03/2016 14:27:07:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3136s; samplesPerSecond = 2040.8
-MPI Rank 2: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3128s; samplesPerSecond = 2045.8
-MPI Rank 2: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3127s; samplesPerSecond = 2046.5
-MPI Rank 2: 05/03/2016 14:27:08:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3130s; samplesPerSecond = 2044.7
-MPI Rank 2: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3129s; samplesPerSecond = 2045.6
-MPI Rank 2: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3123s; samplesPerSecond = 2049.4
-MPI Rank 2: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3131s; samplesPerSecond = 2044.0
-MPI Rank 2: 05/03/2016 14:27:09:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3141s; samplesPerSecond = 2037.8
-MPI Rank 2: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3145s; samplesPerSecond = 2035.1
-MPI Rank 2: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3126s; samplesPerSecond = 2047.1
-MPI Rank 2: 05/03/2016 14:27:10:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3140s; samplesPerSecond = 2038.2
-MPI Rank 2: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3133s; samplesPerSecond = 2042.6
-MPI Rank 2: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.2923s; samplesPerSecond = 2189.7
-MPI Rank 2: 05/03/2016 14:27:11:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3126s; samplesPerSecond = 2047.2
-MPI Rank 2: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3029s; samplesPerSecond = 2112.9
-MPI Rank 2: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3139s; samplesPerSecond = 2039.2
-MPI Rank 2: 05/03/2016 14:27:12:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 2: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3133s; samplesPerSecond = 2042.6
-MPI Rank 2: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3126s; samplesPerSecond = 2047.1
-MPI Rank 2: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3132s; samplesPerSecond = 2043.2
-MPI Rank 2: 05/03/2016 14:27:13:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3138s; samplesPerSecond = 2039.7
-MPI Rank 2: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3140s; samplesPerSecond = 2038.4
-MPI Rank 2: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3134s; samplesPerSecond = 2042.2
-MPI Rank 2: 05/03/2016 14:27:14:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3141s; samplesPerSecond = 2037.4
-MPI Rank 2: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3127s; samplesPerSecond = 2046.5
-MPI Rank 2: 05/03/2016 14:27:15:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3134s; samplesPerSecond = 2042.3
-MPI Rank 2: 05/03/2016 14:27:15: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.1813s
+MPI Rank 2: 05/03/2016 14:23:31: Starting minibatch loop.
+MPI Rank 2: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.52102408 * 640; EvalErrorPrediction = 0.92656250 * 640; time = 0.3072s; samplesPerSecond = 2083.7
+MPI Rank 2: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.21764659 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.3123s; samplesPerSecond = 2049.2
+MPI Rank 2: 05/03/2016 14:23:32:  Epoch[ 1 of 4]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.92251861 * 640; EvalErrorPrediction = 0.85000000 * 640; time = 0.3139s; samplesPerSecond = 2038.8
+MPI Rank 2: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.91289446 * 640; EvalErrorPrediction = 0.88750000 * 640; time = 0.3138s; samplesPerSecond = 2039.3
+MPI Rank 2: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84057836 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.3135s; samplesPerSecond = 2041.4
+MPI Rank 2: 05/03/2016 14:23:33:  Epoch[ 1 of 4]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71077800 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.3125s; samplesPerSecond = 2048.2
+MPI Rank 2: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.50986627 * 640; EvalErrorPrediction = 0.81718750 * 640; time = 0.3139s; samplesPerSecond = 2038.6
+MPI Rank 2: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.47993705 * 640; EvalErrorPrediction = 0.81250000 * 640; time = 0.3144s; samplesPerSecond = 2035.9
+MPI Rank 2: 05/03/2016 14:23:34:  Epoch[ 1 of 4]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33550558 * 640; EvalErrorPrediction = 0.76718750 * 640; time = 0.3331s; samplesPerSecond = 1921.5
+MPI Rank 2: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.49726054 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3242s; samplesPerSecond = 1974.3
+MPI Rank 2: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21905375 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.3133s; samplesPerSecond = 2043.0
+MPI Rank 2: 05/03/2016 14:23:35:  Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31461145 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.3120s; samplesPerSecond = 2051.4
+MPI Rank 2: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.15950802 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.3139s; samplesPerSecond = 2038.7
+MPI Rank 2: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07762131 * 640; EvalErrorPrediction = 0.77187500 * 640; time = 0.3118s; samplesPerSecond = 2052.6
+MPI Rank 2: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05637351 * 640; EvalErrorPrediction = 0.72187500 * 640; time = 0.3021s; samplesPerSecond = 2118.7
+MPI Rank 2: 05/03/2016 14:23:36:  Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91153531 * 640; EvalErrorPrediction = 0.69062500 * 640; time = 0.3128s; samplesPerSecond = 2045.9
+MPI Rank 2: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.89745725 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.3140s; samplesPerSecond = 2038.1
+MPI Rank 2: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72829961 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.3133s; samplesPerSecond = 2042.5
+MPI Rank 2: 05/03/2016 14:23:37:  Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.65806444 * 640; EvalErrorPrediction = 0.68593750 * 640; time = 0.3129s; samplesPerSecond = 2045.6
+MPI Rank 2: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.66604147 * 640; EvalErrorPrediction = 0.66093750 * 640; time = 0.3126s; samplesPerSecond = 2047.4
+MPI Rank 2: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53915697 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.3124s; samplesPerSecond = 2048.8
+MPI Rank 2: 05/03/2016 14:23:38:  Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61937093 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.3136s; samplesPerSecond = 2040.6
+MPI Rank 2: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51539473 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.3133s; samplesPerSecond = 2042.8
+MPI Rank 2: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47301309 * 640; EvalErrorPrediction = 0.64218750 * 640; time = 0.3130s; samplesPerSecond = 2044.8
+MPI Rank 2: 05/03/2016 14:23:39:  Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.42748799 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.3131s; samplesPerSecond = 2044.0
+MPI Rank 2: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.42204482 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.3131s; samplesPerSecond = 2044.0
+MPI Rank 2: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17342812 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.3129s; samplesPerSecond = 2045.3
+MPI Rank 2: 05/03/2016 14:23:40:  Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31290374 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.3124s; samplesPerSecond = 2048.4
+MPI Rank 2: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26008782 * 640; EvalErrorPrediction = 0.60312500 * 640; time = 0.3139s; samplesPerSecond = 2038.7
+MPI Rank 2: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15763314 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.3023s; samplesPerSecond = 2117.2
+MPI Rank 2: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.23496000 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.3131s; samplesPerSecond = 2043.8
+MPI Rank 2: 05/03/2016 14:23:41:  Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25712791 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.3128s; samplesPerSecond = 2046.0
+MPI Rank 2: 05/03/2016 14:23:41: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.00091203 * 20480; EvalErrorPrediction = 0.72744141 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=10.0359s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:15: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: 05/03/2016 14:23:42: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Actual gradient aggregation time: 0.06783
-MPI Rank 2: Async gradient aggregation wait time: 0.01546
-MPI Rank 2: Actual gradient aggregation time: 0.05978
-MPI Rank 2: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4834s; samplesPerSecond = 4766.2
-MPI Rank 2: Async gradient aggregation wait time: 9.4e-005
-MPI Rank 2: Actual gradient aggregation time: 0.067411
-MPI Rank 2: Async gradient aggregation wait time: 0.034796
-MPI Rank 2: Actual gradient aggregation time: 0.050243
-MPI Rank 2: 05/03/2016 14:27:16:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.5220s; samplesPerSecond = 4904.2
-MPI Rank 2: Async gradient aggregation wait time: 0.005421
-MPI Rank 2: Actual gradient aggregation time: 0.050163
-MPI Rank 2: Async gradient aggregation wait time: 0.032713
-MPI Rank 2: Actual gradient aggregation time: 0.050114
-MPI Rank 2: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.5015s; samplesPerSecond = 5104.7
-MPI Rank 2: Async gradient aggregation wait time: 0.006759
-MPI Rank 2: Actual gradient aggregation time: 0.051325
-MPI Rank 2: Async gradient aggregation wait time: 0.007732
-MPI Rank 2: Actual gradient aggregation time: 0.050678
-MPI Rank 2: 05/03/2016 14:27:17:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5186s; samplesPerSecond = 4936.0
-MPI Rank 2: Async gradient aggregation wait time: 0.007587
-MPI Rank 2: Actual gradient aggregation time: 0.050527
-MPI Rank 2: Async gradient aggregation wait time: 0.007708
-MPI Rank 2: Actual gradient aggregation time: 0.049596
-MPI Rank 2: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.5022s; samplesPerSecond = 5097.5
-MPI Rank 2: Async gradient aggregation wait time: 0.007746
-MPI Rank 2: Actual gradient aggregation time: 0.039907
-MPI Rank 2: Async gradient aggregation wait time: 0.034156
-MPI Rank 2: Actual gradient aggregation time: 0.008888
-MPI Rank 2: 05/03/2016 14:27:18:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.4998s; samplesPerSecond = 5121.8
-MPI Rank 2: Async gradient aggregation wait time: 0.007755
-MPI Rank 2: Actual gradient aggregation time: 0.049251
-MPI Rank 2: Async gradient aggregation wait time: 0.007717
-MPI Rank 2: Actual gradient aggregation time: 0.050623
-MPI Rank 2: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.5037s; samplesPerSecond = 5082.3
-MPI Rank 2: Async gradient aggregation wait time: 0.033766
-MPI Rank 2: Actual gradient aggregation time: 0.051638
-MPI Rank 2: Async gradient aggregation wait time: 0.035712
-MPI Rank 2: Actual gradient aggregation time: 0.050839
-MPI Rank 2: 05/03/2016 14:27:19:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5104s; samplesPerSecond = 5015.9
-MPI Rank 2: Async gradient aggregation wait time: 0.010264
-MPI Rank 2: Actual gradient aggregation time: 0.012721
-MPI Rank 2: 05/03/2016 14:27:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.07195s
+MPI Rank 2: 05/03/2016 14:23:42: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Actual gradient aggregation time: 0.041505
+MPI Rank 2: Async gradient aggregation wait time: 0.009861
+MPI Rank 2: Actual gradient aggregation time: 0.047081
+MPI Rank 2: 05/03/2016 14:23:42:  Epoch[ 2 of 4]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12914686 * 2304; EvalErrorPrediction = 0.57855903 * 2304; time = 0.4685s; samplesPerSecond = 4917.4
+MPI Rank 2: Async gradient aggregation wait time: 1e-006
+MPI Rank 2: Actual gradient aggregation time: 0.016583
+MPI Rank 2: Async gradient aggregation wait time: 0.028586
+MPI Rank 2: Actual gradient aggregation time: 0.049296
+MPI Rank 2: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11816271 * 2560; EvalErrorPrediction = 0.59179688 * 2560; time = 0.4969s; samplesPerSecond = 5151.5
+MPI Rank 2: Async gradient aggregation wait time: 0.020856
+MPI Rank 2: Actual gradient aggregation time: 0.049541
+MPI Rank 2: Async gradient aggregation wait time: 0.022505
+MPI Rank 2: Actual gradient aggregation time: 0.048481
+MPI Rank 2: 05/03/2016 14:23:43:  Epoch[ 2 of 4]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.18355731 * 2560; EvalErrorPrediction = 0.58359375 * 2560; time = 0.4940s; samplesPerSecond = 5182.5
+MPI Rank 2: Async gradient aggregation wait time: 0.018034
+MPI Rank 2: Actual gradient aggregation time: 0.05137
+MPI Rank 2: Async gradient aggregation wait time: 0.021858
+MPI Rank 2: Actual gradient aggregation time: 0.048437
+MPI Rank 2: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 2.10557335 * 2560; EvalErrorPrediction = 0.57812500 * 2560; time = 0.5035s; samplesPerSecond = 5084.1
+MPI Rank 2: Async gradient aggregation wait time: 0.021824
+MPI Rank 2: Actual gradient aggregation time: 0.0487
+MPI Rank 2: Async gradient aggregation wait time: 0.018855
+MPI Rank 2: Actual gradient aggregation time: 0.048501
+MPI Rank 2: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 2.01391880 * 2560; EvalErrorPrediction = 0.55312500 * 2560; time = 0.4964s; samplesPerSecond = 5157.2
+MPI Rank 2: Async gradient aggregation wait time: 0.020072
+MPI Rank 2: Actual gradient aggregation time: 0.04792
+MPI Rank 2: Async gradient aggregation wait time: 0.018395
+MPI Rank 2: Actual gradient aggregation time: 0.049381
+MPI Rank 2: 05/03/2016 14:23:44:  Epoch[ 2 of 4]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.05221236 * 2560; EvalErrorPrediction = 0.57148438 * 2560; time = 0.4927s; samplesPerSecond = 5195.6
+MPI Rank 2: Async gradient aggregation wait time: 0.020174
+MPI Rank 2: Actual gradient aggregation time: 0.046712
+MPI Rank 2: Async gradient aggregation wait time: 0.026234
+MPI Rank 2: Actual gradient aggregation time: 0.048632
+MPI Rank 2: 05/03/2016 14:23:45:  Epoch[ 2 of 4]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 2.11613999 * 2560; EvalErrorPrediction = 0.56562500 * 2560; time = 0.4971s; samplesPerSecond = 5149.9
+MPI Rank 2: Async gradient aggregation wait time: 0.018806
+MPI Rank 2: Actual gradient aggregation time: 0.050871
+MPI Rank 2: Async gradient aggregation wait time: 0.024346
+MPI Rank 2: Actual gradient aggregation time: 0.048397
+MPI Rank 2: 05/03/2016 14:23:46:  Epoch[ 2 of 4]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 2.06152980 * 2560; EvalErrorPrediction = 0.57421875 * 2560; time = 0.5120s; samplesPerSecond = 4999.9
+MPI Rank 2: Async gradient aggregation wait time: 0.009338
+MPI Rank 2: Actual gradient aggregation time: 0.012827
+MPI Rank 2: 05/03/2016 14:23:46: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.09738685 * 20480; EvalErrorPrediction = 0.57431641 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.99965s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:23:46: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.116237
-MPI Rank 2: Actual gradient aggregation time: 0.180867
-MPI Rank 2: Async gradient aggregation wait time: 2e-006
-MPI Rank 2: Actual gradient aggregation time: 0.039281
-MPI Rank 2: 05/03/2016 14:27:21:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5160s; samplesPerSecond = 6079.0
-MPI Rank 2: Async gradient aggregation wait time: 0.111877
-MPI Rank 2: Actual gradient aggregation time: 0.297592
-MPI Rank 2: Async gradient aggregation wait time: 0.003947
-MPI Rank 2: Actual gradient aggregation time: 0.171555
-MPI Rank 2: 05/03/2016 14:27:23:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.6712s; samplesPerSecond = 6127.3
-MPI Rank 2: 05/03/2016 14:27:23: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.21622s
+MPI Rank 2: 05/03/2016 14:23:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.108117
+MPI Rank 2: Actual gradient aggregation time: 0.133336
+MPI Rank 2: Async gradient aggregation wait time: 1e-006
+MPI Rank 2: Actual gradient aggregation time: 0.040659
+MPI Rank 2: 05/03/2016 14:23:47:  Epoch[ 3 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 2.04847812 * 9216; EvalErrorPrediction = 0.54014757 * 9216; time = 1.5734s; samplesPerSecond = 5857.3
+MPI Rank 2: Async gradient aggregation wait time: 0.109327
+MPI Rank 2: Actual gradient aggregation time: 0.151951
+MPI Rank 2: Async gradient aggregation wait time: 0.005264
+MPI Rank 2: Actual gradient aggregation time: 0.149863
+MPI Rank 2: 05/03/2016 14:23:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.94481165 * 10240; EvalErrorPrediction = 0.52617187 * 10240; time = 1.6808s; samplesPerSecond = 6092.4
+MPI Rank 2: 05/03/2016 14:23:49: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 1.98968082 * 20480; EvalErrorPrediction = 0.53188477 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=3.2822s
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:23: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: 05/03/2016 14:23:49: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
-MPI Rank 2: Async gradient aggregation wait time: 0.00251
-MPI Rank 2: Actual gradient aggregation time: 0.2598
-MPI Rank 2: Async gradient aggregation wait time: 0.107855
-MPI Rank 2: Actual gradient aggregation time: 0.114645
-MPI Rank 2: 05/03/2016 14:27:24:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.5376s; samplesPerSecond = 5993.6
-MPI Rank 2: Async gradient aggregation wait time: 0.005391
-MPI Rank 2: Actual gradient aggregation time: 0.149474
-MPI Rank 2: Async gradient aggregation wait time: 0.008367
-MPI Rank 2: Actual gradient aggregation time: 0.149854
-MPI Rank 2: 05/03/2016 14:27:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.5020s; samplesPerSecond = 6817.5
-MPI Rank 2: Async gradient aggregation wait time: 0.013815
-MPI Rank 2: 05/03/2016 14:27:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.06708s
-MPI Rank 2: 05/03/2016 14:27:26: CNTKCommandTrainEnd: speechTrain
+MPI Rank 2: 05/03/2016 14:23:49: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: Async gradient aggregation wait time: 0.001673
+MPI Rank 2: Actual gradient aggregation time: 0.109261
+MPI Rank 2: Async gradient aggregation wait time: 0.087625
+MPI Rank 2: Actual gradient aggregation time: 0.117858
+MPI Rank 2: 05/03/2016 14:23:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89065735 * 9216; EvalErrorPrediction = 0.51736111 * 9216; time = 1.4654s; samplesPerSecond = 6289.0
+MPI Rank 2: Async gradient aggregation wait time: 0.007458
+MPI Rank 2: Actual gradient aggregation time: 0.203664
+MPI Rank 2: Async gradient aggregation wait time: 0.110939
+MPI Rank 2: Actual gradient aggregation time: 0.115146
+MPI Rank 2: 05/03/2016 14:23:52:  Epoch[ 4 of 4]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87529986 * 10240; EvalErrorPrediction = 0.51201172 * 10240; time = 1.5904s; samplesPerSecond = 6438.6
+MPI Rank 2: Async gradient aggregation wait time: 0.013376
+MPI Rank 2: 05/03/2016 14:23:52: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.88285599 * 20480; EvalErrorPrediction = 0.51445312 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=3.1024s
+MPI Rank 2: 05/03/2016 14:23:52: CNTKCommandTrainEnd: speechTrain
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:26: Action "train" complete.
+MPI Rank 2: 05/03/2016 14:23:52: Action "train" complete.
 MPI Rank 2: 
-MPI Rank 2: 05/03/2016 14:27:26: __COMPLETED__
\ No newline at end of file
+MPI Rank 2: 05/03/2016 14:23:52: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test
index 3d7d8c5d4..c0f09600c 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test
@@ -8,6 +8,12 @@ LogFileName=stderr
 Instances=3
 NumCPUThreads=$(threadsPerInstance $Instances)
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
 cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]] speechTrain=[reader=[prefetch=true]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]"
 ExitCode=$?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/Truncated/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/Truncated/run-test
index 4f91b25c0..cfbac6391 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/Truncated/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/Truncated/run-test
@@ -7,7 +7,7 @@ ConfigDir=$TEST_DIR/../../../LSTM
 
 (cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
 if [ $? != 0 ]; then
-  echo Error: Baselines must match original test.
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
   exit 1
 fi
 
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.cpu.txt
index 0436d8dcd..2abec1eed 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.cpu.txt
@@ -1,4 +1,4 @@
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu DeviceId=-1 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -17,32 +17,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:00: -------------------------------------------------------------------
-05/03/2016 18:18:00: Build info: 
+05/03/2016 18:18:38: -------------------------------------------------------------------
+05/03/2016 18:18:38: Build info: 
 
-05/03/2016 18:18:00: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:00: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:00: 		Build type: release
-05/03/2016 18:18:00: 		Build target: GPU
-05/03/2016 18:18:00: 		With 1bit-SGD: no
-05/03/2016 18:18:00: 		Math lib: acml
-05/03/2016 18:18:00: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:00: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:00: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:00: 		Build Branch: HEAD
-05/03/2016 18:18:00: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:00: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:00: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:00: -------------------------------------------------------------------
+05/03/2016 18:18:38: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:38: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:38: 		Build type: release
+05/03/2016 18:18:38: 		Build target: GPU
+05/03/2016 18:18:38: 		With 1bit-SGD: no
+05/03/2016 18:18:38: 		Math lib: acml
+05/03/2016 18:18:38: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:38: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:38: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:38: 		Build Branch: HEAD
+05/03/2016 18:18:38: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:38: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:38: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:38: -------------------------------------------------------------------
 
-05/03/2016 18:18:00: Running on localhost at 2016/05/03 18:18:00
-05/03/2016 18:18:00: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 18:18:38: Running on localhost at 2016/05/03 18:18:38
+05/03/2016 18:18:38: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu  DeviceId=-1  timestamping=true
 
 
 
-05/03/2016 18:18:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:00: precision = "float"
+05/03/2016 18:18:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:38: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -128,25 +128,24 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:00: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:00: precision = "float"
+05/03/2016 18:18:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:38: precision = "float"
 command = speechTrain
 deviceId = -1
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -225,30 +224,29 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:00: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:00: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:makeMode=false
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -325,33 +323,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:00: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:00: Commands: speechTrain
-05/03/2016 18:18:00: Precision = "float"
-05/03/2016 18:18:00: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn
-05/03/2016 18:18:00: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:00: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 18:18:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:38: Commands: speechTrain
+05/03/2016 18:18:38: Precision = "float"
+05/03/2016 18:18:38: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn
+05/03/2016 18:18:38: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 18:18:00: ##############################################################################
-05/03/2016 18:18:00: #                                                                            #
-05/03/2016 18:18:00: # Action "train"                                                             #
-05/03/2016 18:18:00: #                                                                            #
-05/03/2016 18:18:00: ##############################################################################
+05/03/2016 18:18:38: ##############################################################################
+05/03/2016 18:18:38: #                                                                            #
+05/03/2016 18:18:38: # Action "train"                                                             #
+05/03/2016 18:18:38: #                                                                            #
+05/03/2016 18:18:38: ##############################################################################
 
-05/03/2016 18:18:00: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:38: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:00: Creating virgin network.
+05/03/2016 18:18:38: Creating virgin network.
 
 Post-processing network...
 
@@ -403,14 +400,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:00: Created model with 25 nodes on CPU.
+05/03/2016 18:18:38: Created model with 25 nodes on CPU.
 
-05/03/2016 18:18:00: Training criterion node(s):
-05/03/2016 18:18:00: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:38: Training criterion node(s):
+05/03/2016 18:18:38: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:00: Evaluation criterion node(s):
+05/03/2016 18:18:38: Evaluation criterion node(s):
 
-05/03/2016 18:18:00: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:38: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -418,112 +415,117 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-0xfa4f78: {[W2 Value[132 x 512]] }
-0xfb95f8: {[CrossEntropyWithSoftmax Value[1]] }
-0xfb9878: {[W0*features Value[512 x *]] }
-0xfc8998: {[B2 Value[132 x 1]] }
-0xfe3868: {[MVNormalizedFeatures Value[363 x *]] }
-0xfe39a8: {[LogOfPrior Value[132]] }
-0x1044b38: {[B1 Value[512 x 1]] }
-0x104ac18: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-0x104add8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-0x106bae8: {[InvStdOfFeatures Value[363]] }
-0x1072f98: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-0x108b8a8: {[labels Value[132 x *]] }
-0x108ff18: {[W1 Value[512 x 512]] }
-0x1095e98: {[W0 Value[512 x 363]] }
-0x10bbc58: {[features Value[363 x *]] }
-0x10c1fd8: {[B0 Value[512 x 1]] }
-0x10cdf28: {[EvalErrorPrediction Value[1]] }
-0x10ce0e8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-0x10cea18: {[MeanOfFeatures Value[363]] }
-0x10d8788: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-0x10d8948: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-0x10d8b08: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-0x10db288: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-0x10db628: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x10db7e8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-0x10db9a8: {[W2*H1 Gradient[132 x 1 x *]] }
-0x10dbb68: {[B2 Gradient[132 x 1]] }
-0x10dbe08: {[Prior Value[132]] }
+0x1381e08: {[W2 Value[132 x 512]] }
+0x1381f08: {[B2 Value[132 x 1]] }
+0x1385b78: {[InvStdOfFeatures Value[363]] }
+0x13866a8: {[MeanOfFeatures Value[363]] }
+0x1386858: {[W1 Value[512 x 512]] }
+0x138a4f8: {[B1 Value[512 x 1]] }
+0x138d288: {[features Value[363 x *]] }
+0x13da588: {[MVNormalizedFeatures Value[363 x *]] }
+0x13dee18: {[EvalErrorPrediction Value[1]] }
+0x13e1868: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+0x13e1a28: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+0x13e1be8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+0x13e1da8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+0x1426628: {[W0 Value[512 x 363]] }
+0x1454828: {[B0 Value[512 x 1]] }
+0x148f0c8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+0x148f2d8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+0x148f498: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+0x14a5828: {[W2*H1 Gradient[132 x 1 x *]] }
+0x14a59e8: {[B2 Gradient[132 x 1]] }
+0x14a7318: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x14a74d8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+0x14a8ac8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+0x14a8c88: {[CrossEntropyWithSoftmax Value[1]] }
+0x14ae858: {[W0*features Value[512 x *]] }
+0x14ae958: {[LogOfPrior Value[132]] }
+0x14af3e8: {[labels Value[132 x *]] }
+0x14afd28: {[Prior Value[132]] }
 
 
-05/03/2016 18:18:00: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 18:18:38: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 18:18:00: 	MeanOfFeatures = Mean()
-05/03/2016 18:18:00: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 18:18:00: 	Prior = Mean()
+05/03/2016 18:18:38: 	MeanOfFeatures = Mean()
+05/03/2016 18:18:38: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 18:18:38: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:04: Precomputing --> Completed.
+05/03/2016 18:18:39: Precomputing --> Completed.
 
 
-05/03/2016 18:18:04: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 18:18:39: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:04: Starting minibatch loop.
-05/03/2016 18:18:04:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181900 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.2546s; samplesPerSecond = 2513.5
-05/03/2016 18:18:04:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675568 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1455s; samplesPerSecond = 4397.5
-05/03/2016 18:18:04:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684082 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0788s; samplesPerSecond = 8117.2
-05/03/2016 18:18:04:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595383 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1143s; samplesPerSecond = 5598.0
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007080 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.1319s; samplesPerSecond = 4852.8
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428192 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1725s; samplesPerSecond = 3710.1
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475586 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0770s; samplesPerSecond = 8315.4
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591919 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0918s; samplesPerSecond = 6971.2
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042786 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.1240s; samplesPerSecond = 5162.2
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39383850 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.0852s; samplesPerSecond = 7509.6
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078430 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.0958s; samplesPerSecond = 6680.3
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35325317 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.0866s; samplesPerSecond = 7387.0
-05/03/2016 18:18:05:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606934 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.1067s; samplesPerSecond = 5996.0
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110535 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.1282s; samplesPerSecond = 4991.7
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118713 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.1014s; samplesPerSecond = 6310.2
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474365 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.1247s; samplesPerSecond = 5133.2
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89902954 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0935s; samplesPerSecond = 6841.8
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173340 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.0905s; samplesPerSecond = 7070.2
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969116 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.0954s; samplesPerSecond = 6711.4
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870483 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.1093s; samplesPerSecond = 5854.5
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655273 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1071s; samplesPerSecond = 5978.4
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327515 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1147s; samplesPerSecond = 5579.3
-05/03/2016 18:18:06:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53099976 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.1295s; samplesPerSecond = 4942.8
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43747559 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.1067s; samplesPerSecond = 5996.5
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41107178 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0903s; samplesPerSecond = 7087.1
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898926 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.3125s; samplesPerSecond = 2048.2
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965820 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.1393s; samplesPerSecond = 4593.6
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23708496 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.1166s; samplesPerSecond = 5490.3
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135376 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0950s; samplesPerSecond = 6737.5
-05/03/2016 18:18:07:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21607666 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1283s; samplesPerSecond = 4987.1
-05/03/2016 18:18:08:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29110107 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.1013s; samplesPerSecond = 6316.5
-05/03/2016 18:18:08:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535278 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.1259s; samplesPerSecond = 5082.8
-05/03/2016 18:18:08: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737366 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.94389s
-05/03/2016 18:18:08: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.1'
+05/03/2016 18:18:39: Starting minibatch loop.
+05/03/2016 18:18:39:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181900 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0735s; samplesPerSecond = 8707.4
+05/03/2016 18:18:39:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675568 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1005s; samplesPerSecond = 6370.4
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684082 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0829s; samplesPerSecond = 7720.1
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595383 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0518s; samplesPerSecond = 12362.8
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007080 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0514s; samplesPerSecond = 12451.8
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428192 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0516s; samplesPerSecond = 12413.4
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475586 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0511s; samplesPerSecond = 12525.7
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591919 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0514s; samplesPerSecond = 12439.5
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042786 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0513s; samplesPerSecond = 12483.2
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39383850 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.0513s; samplesPerSecond = 12470.5
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078430 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.0523s; samplesPerSecond = 12242.2
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35325317 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.0530s; samplesPerSecond = 12069.1
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606934 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0519s; samplesPerSecond = 12332.4
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110535 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.0513s; samplesPerSecond = 12480.0
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118713 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.0876s; samplesPerSecond = 7307.6
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474365 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.0515s; samplesPerSecond = 12429.4
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89902954 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0516s; samplesPerSecond = 12405.5
+05/03/2016 18:18:40:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173340 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.0516s; samplesPerSecond = 12410.3
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969116 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.1163s; samplesPerSecond = 5501.6
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870483 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.0649s; samplesPerSecond = 9868.8
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655273 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0511s; samplesPerSecond = 12520.3
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327515 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0511s; samplesPerSecond = 12526.2
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53099976 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0513s; samplesPerSecond = 12468.6
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43747559 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.0547s; samplesPerSecond = 11694.2
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41107178 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0513s; samplesPerSecond = 12469.3
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898926 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0537s; samplesPerSecond = 11919.0
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965820 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0514s; samplesPerSecond = 12450.2
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23708496 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0511s; samplesPerSecond = 12524.0
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135376 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0514s; samplesPerSecond = 12443.9
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21607666 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0565s; samplesPerSecond = 11332.0
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29110107 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0531s; samplesPerSecond = 12062.3
+05/03/2016 18:18:41:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535278 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.0519s; samplesPerSecond = 12326.9
+05/03/2016 18:18:41: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737366 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=1.88594s
+05/03/2016 18:18:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 18:18:08: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 18:18:41: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:08: Starting minibatch loop.
-05/03/2016 18:18:08:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711155 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.1986s; samplesPerSecond = 12889.8
-05/03/2016 18:18:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925396 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.2552s; samplesPerSecond = 10031.0
-05/03/2016 18:18:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826538 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.2181s; samplesPerSecond = 11736.7
-05/03/2016 18:18:09:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095795 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1814s; samplesPerSecond = 14113.3
-05/03/2016 18:18:09:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550018 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.2214s; samplesPerSecond = 11565.2
-05/03/2016 18:18:09:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561737 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.3224s; samplesPerSecond = 7940.8
-05/03/2016 18:18:09:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069901 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.2357s; samplesPerSecond = 10862.4
-05/03/2016 18:18:10:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857330 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.2088s; samplesPerSecond = 12260.2
-05/03/2016 18:18:10: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199734 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.84574s
-05/03/2016 18:18:10: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'
+05/03/2016 18:18:41: Starting minibatch loop.
+05/03/2016 18:18:41:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711155 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.1738s; samplesPerSecond = 14733.1
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925396 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.1667s; samplesPerSecond = 15355.6
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826538 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1568s; samplesPerSecond = 16327.2
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095795 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.2171s; samplesPerSecond = 11791.3
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550018 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.1636s; samplesPerSecond = 15644.3
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561737 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.1627s; samplesPerSecond = 15732.7
+05/03/2016 18:18:42:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069901 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.1746s; samplesPerSecond = 14665.6
+05/03/2016 18:18:43:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857330 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.1637s; samplesPerSecond = 15635.9
+05/03/2016 18:18:43: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199734 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.38363s
+05/03/2016 18:18:43: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 18:18:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:43: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:10: Starting minibatch loop.
-05/03/2016 18:18:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.9372s; samplesPerSecond = 10926.7
-05/03/2016 18:18:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.5568s; samplesPerSecond = 18389.6
-05/03/2016 18:18:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.49933s
-05/03/2016 18:18:11: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:11: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:43: Starting minibatch loop.
+05/03/2016 18:18:43:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.5706s; samplesPerSecond = 17944.9
+05/03/2016 18:18:44:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.5679s; samplesPerSecond = 18031.6
+05/03/2016 18:18:44: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.14421s
+05/03/2016 18:18:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 18:18:44: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:11: Action "train" complete.
+05/03/2016 18:18:44: Action "train" complete.
 
-05/03/2016 18:18:11: __COMPLETED__
+05/03/2016 18:18:44: __COMPLETED__
 === Deleting last epoch data
 ==== Re-running from checkpoint
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu DeviceId=-1 timestamping=true makeMode=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu DeviceId=-1 timestamping=true makeMode=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -542,32 +544,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:11: -------------------------------------------------------------------
-05/03/2016 18:18:11: Build info: 
+05/03/2016 18:18:44: -------------------------------------------------------------------
+05/03/2016 18:18:44: Build info: 
 
-05/03/2016 18:18:11: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:11: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:11: 		Build type: release
-05/03/2016 18:18:11: 		Build target: GPU
-05/03/2016 18:18:11: 		With 1bit-SGD: no
-05/03/2016 18:18:11: 		Math lib: acml
-05/03/2016 18:18:11: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:11: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:11: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:11: 		Build Branch: HEAD
-05/03/2016 18:18:11: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:11: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:11: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:11: -------------------------------------------------------------------
+05/03/2016 18:18:44: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:44: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:44: 		Build type: release
+05/03/2016 18:18:44: 		Build target: GPU
+05/03/2016 18:18:44: 		With 1bit-SGD: no
+05/03/2016 18:18:44: 		Math lib: acml
+05/03/2016 18:18:44: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:44: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:44: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:44: 		Build Branch: HEAD
+05/03/2016 18:18:44: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:44: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:44: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:44: -------------------------------------------------------------------
 
-05/03/2016 18:18:11: Running on localhost at 2016/05/03 18:18:11
-05/03/2016 18:18:11: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  makeMode=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 18:18:44: Running on localhost at 2016/05/03 18:18:44
+05/03/2016 18:18:44: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  makeMode=true
 
 
 
-05/03/2016 18:18:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:11: precision = "float"
+05/03/2016 18:18:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:44: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -653,26 +655,25 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:11: precision = "float"
+05/03/2016 18:18:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:44: precision = "float"
 command = speechTrain
 deviceId = -1
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -751,31 +752,30 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:11: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:44: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:makeMode=true
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -852,33 +852,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:11: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:11: Commands: speechTrain
-05/03/2016 18:18:11: Precision = "float"
-05/03/2016 18:18:11: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn
-05/03/2016 18:18:11: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:11: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 18:18:44: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:44: Commands: speechTrain
+05/03/2016 18:18:44: Precision = "float"
+05/03/2016 18:18:44: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn
+05/03/2016 18:18:44: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:44: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 18:18:11: ##############################################################################
-05/03/2016 18:18:11: #                                                                            #
-05/03/2016 18:18:11: # Action "train"                                                             #
-05/03/2016 18:18:11: #                                                                            #
-05/03/2016 18:18:11: ##############################################################################
+05/03/2016 18:18:44: ##############################################################################
+05/03/2016 18:18:44: #                                                                            #
+05/03/2016 18:18:44: # Action "train"                                                             #
+05/03/2016 18:18:44: #                                                                            #
+05/03/2016 18:18:44: ##############################################################################
 
-05/03/2016 18:18:11: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:44: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:11: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'.
+05/03/2016 18:18:44: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'.
 
 Post-processing network...
 
@@ -930,14 +929,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:11: Loaded model with 25 nodes on CPU.
+05/03/2016 18:18:44: Loaded model with 25 nodes on CPU.
 
-05/03/2016 18:18:11: Training criterion node(s):
-05/03/2016 18:18:11: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:44: Training criterion node(s):
+05/03/2016 18:18:44: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:11: Evaluation criterion node(s):
+05/03/2016 18:18:44: Evaluation criterion node(s):
 
-05/03/2016 18:18:11: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:44: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -945,46 +944,48 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *1]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *1]] [PosteriorProb Value[132 x 1 x *1]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *1]] [features Gradient[363 x *1]] [labels Gradient[132 x *1]] }
-0x1aeb028: {[features Value[363 x *1]] }
-0x1aeefc8: {[W0 Value[512 x 363]] }
-0x1af6028: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
-0x1af6178: {[W0*features Value[512 x *1]] }
-0x1b44848: {[Prior Value[132]] }
-0x1b45f68: {[InvStdOfFeatures Value[363]] }
-0x1b47188: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
-0x1b47348: {[W2*H1 Gradient[132 x 1 x *1]] }
-0x1b47508: {[B2 Gradient[132 x 1]] }
-0x1bb5828: {[EvalErrorPrediction Value[1]] }
-0x1bb88e8: {[B2 Value[132 x 1]] }
-0x1bbf8c8: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
-0x1bbfa88: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
-0x1bbfc48: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
-0x1bc48f8: {[W2 Value[132 x 512]] }
-0x1bc4b58: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
-0x1bc4cb8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
-0x1bc4e78: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
-0x1bdcd98: {[B0 Value[512 x 1]] }
-0x1be2ca8: {[W1 Value[512 x 512]] }
-0x1bf94e8: {[B1 Value[512 x 1]] }
-0x1c22658: {[MVNormalizedFeatures Value[363 x *1]] }
-0x1c22708: {[LogOfPrior Value[132]] }
-0x1c246c8: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x1c25598: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
-0x1c25758: {[CrossEntropyWithSoftmax Value[1]] }
-0x1c28958: {[labels Value[132 x *1]] }
-0x1c2ca78: {[MeanOfFeatures Value[363]] }
+0x162aba8: {[B1 Value[512 x 1]] }
+0x162b068: {[B0 Value[512 x 1]] }
+0x162dc58: {[features Value[363 x *1]] }
+0x162e6e8: {[B2 Value[132 x 1]] }
+0x162e808: {[Prior Value[132]] }
+0x1632798: {[InvStdOfFeatures Value[363]] }
+0x1635a88: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
+0x1635c98: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
+0x1635e58: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
+0x1636018: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
+0x16361d8: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
+0x16820c8: {[EvalErrorPrediction Value[1]] }
+0x16823b8: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
+0x1682578: {[CrossEntropyWithSoftmax Value[1]] }
+0x1682848: {[W0*features Value[512 x *1]] }
+0x1689ce8: {[MVNormalizedFeatures Value[363 x *1]] }
+0x16ce748: {[MeanOfFeatures Value[363]] }
+0x1733f58: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x1734118: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
+0x17342d8: {[W2*H1 Gradient[132 x 1 x *1]] }
+0x1734498: {[B2 Gradient[132 x 1]] }
+0x1742148: {[W0 Value[512 x 363]] }
+0x1746838: {[labels Value[132 x *1]] }
+0x174d948: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
+0x174db08: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
+0x174ef58: {[LogOfPrior Value[132]] }
+0x1753168: {[W1 Value[512 x 512]] }
+0x1757cc8: {[W2 Value[132 x 512]] }
 
-05/03/2016 18:18:11: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:18:44: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:18:11: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:44: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:11: Starting minibatch loop.
-05/03/2016 18:18:12:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.9073s; samplesPerSecond = 11286.8
-05/03/2016 18:18:13:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.7119s; samplesPerSecond = 14383.7
-05/03/2016 18:18:13: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.65992s
-05/03/2016 18:18:13: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:13: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:44: Starting minibatch loop.
+05/03/2016 18:18:45:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.7391s; samplesPerSecond = 13854.3
+05/03/2016 18:18:45:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.5899s; samplesPerSecond = 17358.9
+05/03/2016 18:18:45: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.41708s
+05/03/2016 18:18:45: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 18:18:46: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:13: Action "train" complete.
+05/03/2016 18:18:46: Action "train" complete.
 
-05/03/2016 18:18:13: __COMPLETED__
\ No newline at end of file
+05/03/2016 18:18:46: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.gpu.txt
index 00d201c08..e72ce00a1 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -17,32 +17,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:13: -------------------------------------------------------------------
-05/03/2016 18:18:13: Build info: 
+05/03/2016 18:18:46: -------------------------------------------------------------------
+05/03/2016 18:18:46: Build info: 
 
-05/03/2016 18:18:13: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:13: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:13: 		Build type: release
-05/03/2016 18:18:13: 		Build target: GPU
-05/03/2016 18:18:13: 		With 1bit-SGD: no
-05/03/2016 18:18:13: 		Math lib: acml
-05/03/2016 18:18:13: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:13: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:13: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:13: 		Build Branch: HEAD
-05/03/2016 18:18:13: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:13: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:13: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:13: -------------------------------------------------------------------
+05/03/2016 18:18:46: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:46: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:46: 		Build type: release
+05/03/2016 18:18:46: 		Build target: GPU
+05/03/2016 18:18:46: 		With 1bit-SGD: no
+05/03/2016 18:18:46: 		Math lib: acml
+05/03/2016 18:18:46: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:46: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:46: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:46: 		Build Branch: HEAD
+05/03/2016 18:18:46: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:46: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:46: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:46: -------------------------------------------------------------------
 
-05/03/2016 18:18:13: Running on localhost at 2016/05/03 18:18:13
-05/03/2016 18:18:13: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 18:18:46: Running on localhost at 2016/05/03 18:18:46
+05/03/2016 18:18:46: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 18:18:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:13: precision = "float"
+05/03/2016 18:18:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:46: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -128,25 +128,24 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:13: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:13: precision = "float"
+05/03/2016 18:18:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:46: precision = "float"
 command = speechTrain
 deviceId = 0
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -225,30 +224,29 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:13: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:13: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:makeMode=false
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -325,33 +323,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:13: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:13: Commands: speechTrain
-05/03/2016 18:18:13: Precision = "float"
-05/03/2016 18:18:13: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn
-05/03/2016 18:18:13: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:13: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 18:18:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:46: Commands: speechTrain
+05/03/2016 18:18:46: Precision = "float"
+05/03/2016 18:18:46: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn
+05/03/2016 18:18:46: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 18:18:13: ##############################################################################
-05/03/2016 18:18:13: #                                                                            #
-05/03/2016 18:18:13: # Action "train"                                                             #
-05/03/2016 18:18:13: #                                                                            #
-05/03/2016 18:18:13: ##############################################################################
+05/03/2016 18:18:46: ##############################################################################
+05/03/2016 18:18:46: #                                                                            #
+05/03/2016 18:18:46: # Action "train"                                                             #
+05/03/2016 18:18:46: #                                                                            #
+05/03/2016 18:18:46: ##############################################################################
 
-05/03/2016 18:18:13: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:46: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:13: Creating virgin network.
+05/03/2016 18:18:46: Creating virgin network.
 SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -404,14 +401,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:13: Created model with 25 nodes on GPU 0.
+05/03/2016 18:18:46: Created model with 25 nodes on GPU 0.
 
-05/03/2016 18:18:13: Training criterion node(s):
-05/03/2016 18:18:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:46: Training criterion node(s):
+05/03/2016 18:18:46: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:13: Evaluation criterion node(s):
+05/03/2016 18:18:46: Evaluation criterion node(s):
 
-05/03/2016 18:18:13: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:46: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -419,112 +416,117 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-0x200a0e8: {[B0 Value[512 x 1]] }
-0x20cbc58: {[features Value[363 x *]] }
-0x2c98b88: {[MeanOfFeatures Value[363]] }
-0x2c99058: {[InvStdOfFeatures Value[363]] }
-0x2c99d58: {[W0 Value[512 x 363]] }
-0x2e613a8: {[W1 Value[512 x 512]] }
-0x2e62178: {[B1 Value[512 x 1]] }
-0x2e63318: {[W2 Value[132 x 512]] }
-0x2e63a98: {[B2 Value[132 x 1]] }
-0x2e64df8: {[labels Value[132 x *]] }
-0x2e66058: {[Prior Value[132]] }
-0x2e6b8f8: {[EvalErrorPrediction Value[1]] }
-0x2e6bbf8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-0x2e6bdb8: {[CrossEntropyWithSoftmax Value[1]] }
-0x2e6c248: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-0x2e6c3b8: {[LogOfPrior Value[132]] }
-0x314d088: {[MVNormalizedFeatures Value[363 x *]] }
-0x314d848: {[W0*features Value[512 x *]] }
-0x314da58: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-0x314dbb8: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-0x314dd18: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-0x314ded8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-0x314e098: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-0x314e258: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-0x314edb8: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x314ef78: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-0x314f138: {[W2*H1 Gradient[132 x 1 x *]] }
-0x314f2f8: {[B2 Gradient[132 x 1]] }
+0x243a288: {[features Value[363 x *]] }
+0x31ae688: {[InvStdOfFeatures Value[363]] }
+0x31af3b8: {[W0 Value[512 x 363]] }
+0x31b31c8: {[MeanOfFeatures Value[363]] }
+0x32d6f38: {[W1 Value[512 x 512]] }
+0x32d7d08: {[B1 Value[512 x 1]] }
+0x32d8ea8: {[W2 Value[132 x 512]] }
+0x32d9628: {[B2 Value[132 x 1]] }
+0x32da988: {[labels Value[132 x *]] }
+0x32dbbe8: {[Prior Value[132]] }
+0x32e1488: {[EvalErrorPrediction Value[1]] }
+0x32e1788: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+0x32e1948: {[CrossEntropyWithSoftmax Value[1]] }
+0x32e1dd8: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+0x32e1f48: {[LogOfPrior Value[132]] }
+0x32e7578: {[B0 Value[512 x 1]] }
+0x35c2c18: {[MVNormalizedFeatures Value[363 x *]] }
+0x35c33d8: {[W0*features Value[512 x *]] }
+0x35c35e8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+0x35c3748: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+0x35c38a8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+0x35c3a68: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+0x35c3c28: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+0x35c3de8: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+0x35c4948: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x35c4b08: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+0x35c4cc8: {[W2*H1 Gradient[132 x 1 x *]] }
+0x35c4e88: {[B2 Gradient[132 x 1]] }
 
 
-05/03/2016 18:18:13: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 18:18:46: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 18:18:13: 	MeanOfFeatures = Mean()
-05/03/2016 18:18:13: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 18:18:13: 	Prior = Mean()
+05/03/2016 18:18:46: 	MeanOfFeatures = Mean()
+05/03/2016 18:18:46: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 18:18:46: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:16: Precomputing --> Completed.
+05/03/2016 18:18:47: Precomputing --> Completed.
 
 
-05/03/2016 18:18:16: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 18:18:47: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:16: Starting minibatch loop.
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135277 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.0964s; samplesPerSecond = 6639.9
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070992 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.0082s; samplesPerSecond = 78029.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901123 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0083s; samplesPerSecond = 77360.1
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945953 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0081s; samplesPerSecond = 78653.1
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219574 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0082s; samplesPerSecond = 77754.8
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890930 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.0082s; samplesPerSecond = 78039.3
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56186981 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.0082s; samplesPerSecond = 78220.5
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790527 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0082s; samplesPerSecond = 78306.6
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928528 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0080s; samplesPerSecond = 80493.0
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398926 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.0079s; samplesPerSecond = 80533.5
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223450 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.0079s; samplesPerSecond = 80808.1
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265564 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.0080s; samplesPerSecond = 80371.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14082031 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.0079s; samplesPerSecond = 80971.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00689697 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.0079s; samplesPerSecond = 80787.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00495911 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.0079s; samplesPerSecond = 80604.5
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97858887 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0080s; samplesPerSecond = 80361.6
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686035 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0079s; samplesPerSecond = 80797.9
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053345 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0081s; samplesPerSecond = 78808.0
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653564 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.0082s; samplesPerSecond = 78431.4
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702026 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0081s; samplesPerSecond = 78856.6
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61571655 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0081s; samplesPerSecond = 78595.1
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55236206 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0082s; samplesPerSecond = 78489.1
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211670 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.0082s; samplesPerSecond = 78469.8
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778687 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0082s; samplesPerSecond = 77868.4
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900635 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0081s; samplesPerSecond = 78914.9
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967285 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0081s; samplesPerSecond = 78643.4
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281982 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.0081s; samplesPerSecond = 78643.4
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19668579 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.0082s; samplesPerSecond = 78010.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28980103 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.0082s; samplesPerSecond = 78383.3
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750854 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.0081s; samplesPerSecond = 78633.7
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26263428 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.0082s; samplesPerSecond = 78316.2
-05/03/2016 18:18:16:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15072632 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.0082s; samplesPerSecond = 78469.8
-05/03/2016 18:18:16: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995720 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.38744s
-05/03/2016 18:18:16: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.1'
+05/03/2016 18:18:47: Starting minibatch loop.
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135277 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.0101s; samplesPerSecond = 63479.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070992 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.0075s; samplesPerSecond = 85435.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901123 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0074s; samplesPerSecond = 85929.1
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945953 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0075s; samplesPerSecond = 85871.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219574 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0074s; samplesPerSecond = 85975.3
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890930 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.0075s; samplesPerSecond = 85836.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56186981 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.0075s; samplesPerSecond = 85802.4
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790527 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0075s; samplesPerSecond = 85825.4
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928528 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0074s; samplesPerSecond = 86021.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398926 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.0074s; samplesPerSecond = 85929.1
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223450 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.0075s; samplesPerSecond = 85767.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265564 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.0075s; samplesPerSecond = 85721.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14082031 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.0074s; samplesPerSecond = 86183.7
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00689697 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.0075s; samplesPerSecond = 85710.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00495911 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.0074s; samplesPerSecond = 85952.2
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97858887 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0075s; samplesPerSecond = 85894.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686035 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0074s; samplesPerSecond = 86241.7
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053345 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0074s; samplesPerSecond = 86021.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653564 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.0075s; samplesPerSecond = 85906.0
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702026 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0075s; samplesPerSecond = 85790.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61571655 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0074s; samplesPerSecond = 86114.1
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55236206 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0075s; samplesPerSecond = 85676.0
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211670 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.0074s; samplesPerSecond = 86241.7
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778687 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0075s; samplesPerSecond = 85906.0
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900635 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0074s; samplesPerSecond = 86033.1
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967285 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0075s; samplesPerSecond = 85767.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281982 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.0075s; samplesPerSecond = 85435.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19668579 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.0075s; samplesPerSecond = 85424.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28980103 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.0074s; samplesPerSecond = 85986.8
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750854 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.0075s; samplesPerSecond = 85710.5
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26263428 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.0074s; samplesPerSecond = 86206.9
+05/03/2016 18:18:47:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15072632 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.0075s; samplesPerSecond = 84891.9
+05/03/2016 18:18:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995720 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.244292s
+05/03/2016 18:18:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 18:18:16: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 18:18:47: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:16: Starting minibatch loop.
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598530 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0173s; samplesPerSecond = 147627.0
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818569 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.0146s; samplesPerSecond = 175715.6
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698120 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0147s; samplesPerSecond = 173653.5
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126144 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.0148s; samplesPerSecond = 172786.2
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067825 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.0149s; samplesPerSecond = 172309.3
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115860 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0148s; samplesPerSecond = 172821.2
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518127 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.0144s; samplesPerSecond = 177691.4
-05/03/2016 18:18:16:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450439 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0144s; samplesPerSecond = 177273.0
-05/03/2016 18:18:16: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924202 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.121744s
-05/03/2016 18:18:16: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'
+05/03/2016 18:18:47: Starting minibatch loop.
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598530 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0144s; samplesPerSecond = 177174.9
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818569 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.0121s; samplesPerSecond = 211692.7
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698120 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0121s; samplesPerSecond = 212342.4
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126144 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.0120s; samplesPerSecond = 212483.4
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067825 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.0121s; samplesPerSecond = 212254.4
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115860 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0122s; samplesPerSecond = 209973.8
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518127 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.0121s; samplesPerSecond = 212166.4
+05/03/2016 18:18:47:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450439 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0120s; samplesPerSecond = 212642.2
+05/03/2016 18:18:47: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924202 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.101205s
+05/03/2016 18:18:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 18:18:16: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:47: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:16: Starting minibatch loop.
-05/03/2016 18:18:16:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.0472s; samplesPerSecond = 216976.7
-05/03/2016 18:18:16:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0409s; samplesPerSecond = 250464.7
-05/03/2016 18:18:16: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.090429s
-05/03/2016 18:18:16: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:16: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:47: Starting minibatch loop.
+05/03/2016 18:18:47:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.0355s; samplesPerSecond = 288320.8
+05/03/2016 18:18:47:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0300s; samplesPerSecond = 341663.6
+05/03/2016 18:18:47: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.070216s
+05/03/2016 18:18:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 18:18:47: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:16: Action "train" complete.
+05/03/2016 18:18:47: Action "train" complete.
 
-05/03/2016 18:18:16: __COMPLETED__
+05/03/2016 18:18:47: __COMPLETED__
 === Deleting last epoch data
 ==== Re-running from checkpoint
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu DeviceId=0 timestamping=true makeMode=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu DeviceId=0 timestamping=true makeMode=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -543,32 +545,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:16: -------------------------------------------------------------------
-05/03/2016 18:18:16: Build info: 
+05/03/2016 18:18:47: -------------------------------------------------------------------
+05/03/2016 18:18:47: Build info: 
 
-05/03/2016 18:18:16: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:16: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:16: 		Build type: release
-05/03/2016 18:18:16: 		Build target: GPU
-05/03/2016 18:18:16: 		With 1bit-SGD: no
-05/03/2016 18:18:16: 		Math lib: acml
-05/03/2016 18:18:16: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:16: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:16: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:16: 		Build Branch: HEAD
-05/03/2016 18:18:16: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:16: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:16: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:16: -------------------------------------------------------------------
+05/03/2016 18:18:47: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:47: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:47: 		Build type: release
+05/03/2016 18:18:47: 		Build target: GPU
+05/03/2016 18:18:47: 		With 1bit-SGD: no
+05/03/2016 18:18:47: 		Math lib: acml
+05/03/2016 18:18:47: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:47: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:47: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:47: 		Build Branch: HEAD
+05/03/2016 18:18:47: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:47: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:47: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:47: -------------------------------------------------------------------
 
-05/03/2016 18:18:16: Running on localhost at 2016/05/03 18:18:16
-05/03/2016 18:18:16: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu  DeviceId=0  timestamping=true  makeMode=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 18:18:47: Running on localhost at 2016/05/03 18:18:47
+05/03/2016 18:18:47: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu  DeviceId=0  timestamping=true  makeMode=true
 
 
 
-05/03/2016 18:18:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:16: precision = "float"
+05/03/2016 18:18:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:47: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -654,26 +656,25 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:16: precision = "float"
+05/03/2016 18:18:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:47: precision = "float"
 command = speechTrain
 deviceId = 0
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -752,31 +753,30 @@ speechTrain = [
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 18:18:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:16: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/../../QuickE2E
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/QuickE2E
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:makeMode=true
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -853,33 +853,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:16: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:16: Commands: speechTrain
-05/03/2016 18:18:16: Precision = "float"
-05/03/2016 18:18:16: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn
-05/03/2016 18:18:16: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:16: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 18:18:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:47: Commands: speechTrain
+05/03/2016 18:18:47: Precision = "float"
+05/03/2016 18:18:47: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn
+05/03/2016 18:18:47: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 18:18:16: ##############################################################################
-05/03/2016 18:18:16: #                                                                            #
-05/03/2016 18:18:16: # Action "train"                                                             #
-05/03/2016 18:18:16: #                                                                            #
-05/03/2016 18:18:16: ##############################################################################
+05/03/2016 18:18:47: ##############################################################################
+05/03/2016 18:18:47: #                                                                            #
+05/03/2016 18:18:47: # Action "train"                                                             #
+05/03/2016 18:18:47: #                                                                            #
+05/03/2016 18:18:47: ##############################################################################
 
-05/03/2016 18:18:16: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:47: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:16: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'.
+05/03/2016 18:18:47: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'.
 
 Post-processing network...
 
@@ -931,14 +930,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:17: Loaded model with 25 nodes on GPU 0.
+05/03/2016 18:18:48: Loaded model with 25 nodes on GPU 0.
 
-05/03/2016 18:18:17: Training criterion node(s):
-05/03/2016 18:18:17: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:48: Training criterion node(s):
+05/03/2016 18:18:48: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:17: Evaluation criterion node(s):
+05/03/2016 18:18:48: Evaluation criterion node(s):
 
-05/03/2016 18:18:17: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:48: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -946,46 +945,48 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *1]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *1]] [PosteriorProb Value[132 x 1 x *1]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *1]] [features Gradient[363 x *1]] [labels Gradient[132 x *1]] }
-0x2621718: {[B2 Value[132 x 1]] }
-0x2621a28: {[B1 Value[512 x 1]] }
-0x26e3d98: {[B0 Value[512 x 1]] }
-0x2eb2338: {[InvStdOfFeatures Value[363]] }
-0x2eb2cb8: {[labels Value[132 x *1]] }
-0x2eb3f78: {[MeanOfFeatures Value[363]] }
-0x2eb4568: {[Prior Value[132]] }
-0x2eb5438: {[W0 Value[512 x 363]] }
-0x2ebe9e8: {[features Value[363 x *1]] }
-0x33b8d78: {[W1 Value[512 x 512]] }
-0x33b9de8: {[W2 Value[132 x 512]] }
-0x33bf418: {[EvalErrorPrediction Value[1]] }
-0x33bf5d8: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
-0x33bf798: {[CrossEntropyWithSoftmax Value[1]] }
-0x33bfc28: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
-0x33bfd58: {[LogOfPrior Value[132]] }
-0x33c1428: {[MVNormalizedFeatures Value[363 x *1]] }
-0x33c1be8: {[W0*features Value[512 x *1]] }
-0x33c1df8: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
-0x33c1f58: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
-0x33c2118: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
-0x33c22d8: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
-0x33c2498: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
-0x33c2658: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
-0x33c31b8: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x33c3378: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
-0x33c3538: {[W2*H1 Gradient[132 x 1 x *1]] }
-0x33c36f8: {[B2 Gradient[132 x 1]] }
+0x1866068: {[B0 Value[512 x 1]] }
+0x18f52c8: {[B2 Value[132 x 1]] }
+0x18f55d8: {[B1 Value[512 x 1]] }
+0x2547d18: {[InvStdOfFeatures Value[363]] }
+0x2548698: {[labels Value[132 x *1]] }
+0x2549958: {[MeanOfFeatures Value[363]] }
+0x2549f48: {[Prior Value[132]] }
+0x254ae18: {[W0 Value[512 x 363]] }
+0x2554e78: {[features Value[363 x *1]] }
+0x26208d8: {[W1 Value[512 x 512]] }
+0x2621948: {[W2 Value[132 x 512]] }
+0x26267b8: {[EvalErrorPrediction Value[1]] }
+0x2626e48: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
+0x2627008: {[CrossEntropyWithSoftmax Value[1]] }
+0x2627578: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
+0x2627768: {[LogOfPrior Value[132]] }
+0x2629278: {[MVNormalizedFeatures Value[363 x *1]] }
+0x2629978: {[W0*features Value[512 x *1]] }
+0x2629b88: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
+0x2629ce8: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
+0x2629e48: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
+0x262a008: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
+0x262a1c8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
+0x262a388: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
+0x262aee8: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x262b0a8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
+0x262b268: {[W2*H1 Gradient[132 x 1 x *1]] }
+0x262b428: {[B2 Gradient[132 x 1]] }
 
-05/03/2016 18:18:17: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:18:48: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:18:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:48: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:17: Starting minibatch loop.
-05/03/2016 18:18:17:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.2291s; samplesPerSecond = 44695.5
-05/03/2016 18:18:17:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0417s; samplesPerSecond = 245293.0
-05/03/2016 18:18:17: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.313722s
-05/03/2016 18:18:17: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:17: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:48: Starting minibatch loop.
+05/03/2016 18:18:48:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.1368s; samplesPerSecond = 74864.7
+05/03/2016 18:18:48:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0298s; samplesPerSecond = 343578.0
+05/03/2016 18:18:48: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.255943s
+05/03/2016 18:18:48: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 18:18:48: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:17: Action "train" complete.
+05/03/2016 18:18:48: Action "train" complete.
 
-05/03/2016 18:18:17: __COMPLETED__
\ No newline at end of file
+05/03/2016 18:18:48: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.cpu.txt
index 6862daaa2..f45706f07 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu DeviceId=-1 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu DeviceId=-1 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -16,31 +16,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:28: -------------------------------------------------------------------
-05/03/2016 14:27:28: Build info: 
+05/03/2016 14:28:05: -------------------------------------------------------------------
+05/03/2016 14:28:05: Build info: 
 
-05/03/2016 14:27:28: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:28: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:28: 		Build type: Release
-05/03/2016 14:27:28: 		Build target: GPU
-05/03/2016 14:27:28: 		With 1bit-SGD: no
-05/03/2016 14:27:28: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:28: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:28: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:28: 		Build Branch: HEAD
-05/03/2016 14:27:28: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:28: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:28: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:28: -------------------------------------------------------------------
+05/03/2016 14:28:05: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:05: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:05: 		Build type: Release
+05/03/2016 14:28:05: 		Build target: GPU
+05/03/2016 14:28:05: 		With 1bit-SGD: no
+05/03/2016 14:28:05: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:05: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:05: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:05: 		Build Branch: HEAD
+05/03/2016 14:28:05: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:05: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:05: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:05: -------------------------------------------------------------------
 
-05/03/2016 14:27:28: Running on cntk-muc02 at 2016/05/03 14:27:28
-05/03/2016 14:27:28: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 14:28:05: Running on cntk-muc02 at 2016/05/03 14:28:05
+05/03/2016 14:28:05: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu  DeviceId=-1  timestamping=true
 
 
 
-05/03/2016 14:27:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:28: precision = "float"
+05/03/2016 14:28:05: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:05: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -126,25 +126,24 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:05: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:28: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:28: precision = "float"
+05/03/2016 14:28:05: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:05: precision = "float"
 command = speechTrain
 deviceId = -1
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -223,30 +222,29 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:28: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:05: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:28: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:05: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:makeMode=false
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -323,33 +321,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:28: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:28: Commands: speechTrain
-05/03/2016 14:27:28: Precision = "float"
-05/03/2016 14:27:28: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn
-05/03/2016 14:27:28: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:28: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 14:28:05: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:05: Commands: speechTrain
+05/03/2016 14:28:05: Precision = "float"
+05/03/2016 14:28:05: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn
+05/03/2016 14:28:05: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:05: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 14:27:28: ##############################################################################
-05/03/2016 14:27:28: #                                                                            #
-05/03/2016 14:27:28: # Action "train"                                                             #
-05/03/2016 14:27:28: #                                                                            #
-05/03/2016 14:27:28: ##############################################################################
+05/03/2016 14:28:05: ##############################################################################
+05/03/2016 14:28:05: #                                                                            #
+05/03/2016 14:28:05: # Action "train"                                                             #
+05/03/2016 14:28:05: #                                                                            #
+05/03/2016 14:28:05: ##############################################################################
 
-05/03/2016 14:27:28: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:05: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:28: Creating virgin network.
+05/03/2016 14:28:06: Creating virgin network.
 
 Post-processing network...
 
@@ -401,14 +398,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:28: Created model with 25 nodes on CPU.
+05/03/2016 14:28:06: Created model with 25 nodes on CPU.
 
-05/03/2016 14:27:28: Training criterion node(s):
-05/03/2016 14:27:28: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:06: Training criterion node(s):
+05/03/2016 14:28:06: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:28: Evaluation criterion node(s):
+05/03/2016 14:28:06: Evaluation criterion node(s):
 
-05/03/2016 14:27:28: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:06: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -416,112 +413,117 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-000000B4BD59D0F0: {[features Value[363 x *]] }
-000000B4BD59D230: {[InvStdOfFeatures Value[363]] }
-000000B4BD59D4B0: {[W0 Value[512 x 363]] }
-000000B4BD59D730: {[MeanOfFeatures Value[363]] }
-000000B4BD59D870: {[B0 Value[512 x 1]] }
-000000B4BD59DAF0: {[W1 Value[512 x 512]] }
-000000B4BD59DB90: {[B1 Value[512 x 1]] }
-000000B4BF441AF0: {[LogOfPrior Value[132]] }
-000000B4BF441C30: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-000000B4BF441D70: {[MVNormalizedFeatures Value[363 x *]] }
-000000B4BF441E10: {[CrossEntropyWithSoftmax Gradient[1]] }
-000000B4BF441FF0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-000000B4BF442090: {[W2*H1 Gradient[132 x 1 x *]] }
-000000B4BF442130: {[B2 Value[132 x 1]] }
-000000B4BF442270: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-000000B4BF442310: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-000000B4BF4423B0: {[Prior Value[132]] }
-000000B4BF442770: {[EvalErrorPrediction Value[1]] }
-000000B4BF442810: {[W0*features Value[512 x *]] }
-000000B4BF442950: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-000000B4BF442B30: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-000000B4BF442BD0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-000000B4BF442C70: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-000000B4BF442E50: {[labels Value[132 x *]] }
-000000B4BF442F90: {[W2 Value[132 x 512]] }
-000000B4BF4432B0: {[CrossEntropyWithSoftmax Value[1]] }
-000000B4BF443710: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-000000B4BF443850: {[B2 Gradient[132 x 1]] }
+0000003DFE0F9900: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+0000003DFE0F99A0: {[CrossEntropyWithSoftmax Gradient[1]] }
+0000003DFE0F9AE0: {[labels Value[132 x *]] }
+0000003DFE0F9B80: {[MVNormalizedFeatures Value[363 x *]] }
+0000003DFE0F9C20: {[B2 Value[132 x 1]] }
+0000003DFE0F9D60: {[EvalErrorPrediction Value[1]] }
+0000003DFE0F9E00: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+0000003DFE0FA080: {[W0*features Value[512 x *]] }
+0000003DFE0FA120: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+0000003DFE0FA1C0: {[LogOfPrior Value[132]] }
+0000003DFE0FA260: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+0000003DFE0FA3A0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+0000003DFE0FA440: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+0000003DFE0FA4E0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+0000003DFE0FA760: {[CrossEntropyWithSoftmax Value[1]] }
+0000003DFE0FA9E0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+0000003DFE0FAB20: {[W2*H1 Gradient[132 x 1 x *]] }
+0000003DFE0FABC0: {[B2 Gradient[132 x 1]] }
+0000003DFE0FB340: {[W2 Value[132 x 512]] }
+0000003DFE0FB520: {[Prior Value[132]] }
+0000003DFE0FB7A0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+0000003DFFD05880: {[MeanOfFeatures Value[363]] }
+0000003DFFD059C0: {[B0 Value[512 x 1]] }
+0000003DFFD05B00: {[features Value[363 x *]] }
+0000003DFFD05E20: {[W1 Value[512 x 512]] }
+0000003DFFD05EC0: {[InvStdOfFeatures Value[363]] }
+0000003DFFD06000: {[B1 Value[512 x 1]] }
+0000003DFFD065A0: {[W0 Value[512 x 363]] }
 
 
-05/03/2016 14:27:28: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 14:28:06: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 14:27:28: 	MeanOfFeatures = Mean()
-05/03/2016 14:27:28: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 14:27:28: 	Prior = Mean()
+05/03/2016 14:28:06: 	MeanOfFeatures = Mean()
+05/03/2016 14:28:06: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 14:28:06: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:29: Precomputing --> Completed.
+05/03/2016 14:28:07: Precomputing --> Completed.
 
 
-05/03/2016 14:27:29: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 14:28:07: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:29: Starting minibatch loop.
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944885 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.1949s; samplesPerSecond = 3284.1
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22300034 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0570s; samplesPerSecond = 11233.0
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971329 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0571s; samplesPerSecond = 11199.4
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341614 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0570s; samplesPerSecond = 11220.6
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074249 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0569s; samplesPerSecond = 11251.4
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71251984 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.0569s; samplesPerSecond = 11246.0
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563110 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.0568s; samplesPerSecond = 11262.6
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348450 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.0568s; samplesPerSecond = 11262.1
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739685 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.0566s; samplesPerSecond = 11309.6
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51961060 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0561s; samplesPerSecond = 11413.3
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656067 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.0559s; samplesPerSecond = 11455.8
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397217 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0559s; samplesPerSecond = 11449.4
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780762 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.0562s; samplesPerSecond = 11397.2
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845886 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0560s; samplesPerSecond = 11433.5
-05/03/2016 14:27:30:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06457214 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0560s; samplesPerSecond = 11431.6
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91632080 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0564s; samplesPerSecond = 11342.3
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90608521 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.0566s; samplesPerSecond = 11312.6
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095459 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0561s; samplesPerSecond = 11413.3
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67088013 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0559s; samplesPerSecond = 11452.1
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608643 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0561s; samplesPerSecond = 11410.8
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54733276 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0559s; samplesPerSecond = 11453.9
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925659 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0558s; samplesPerSecond = 11460.9
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52387695 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0560s; samplesPerSecond = 11434.1
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47543945 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.0560s; samplesPerSecond = 11422.0
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265381 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.0560s; samplesPerSecond = 11436.3
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728516 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.0559s; samplesPerSecond = 11440.4
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674561 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0562s; samplesPerSecond = 11386.7
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020508 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.0560s; samplesPerSecond = 11429.8
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400757 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0561s; samplesPerSecond = 11414.5
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885010 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.0561s; samplesPerSecond = 11400.5
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22711792 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.0562s; samplesPerSecond = 11395.0
-05/03/2016 14:27:31:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604858 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0562s; samplesPerSecond = 11388.1
-05/03/2016 14:27:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704632 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.0163s
-05/03/2016 14:27:32: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.1'
+05/03/2016 14:28:07: Starting minibatch loop.
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944885 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.0650s; samplesPerSecond = 9852.2
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22300034 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0570s; samplesPerSecond = 11228.3
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971329 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0569s; samplesPerSecond = 11243.9
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341614 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0579s; samplesPerSecond = 11051.1
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074249 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0570s; samplesPerSecond = 11230.8
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71251984 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.0569s; samplesPerSecond = 11250.4
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563110 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.0571s; samplesPerSecond = 11205.1
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348450 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.0572s; samplesPerSecond = 11179.8
+05/03/2016 14:28:07:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739685 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.0570s; samplesPerSecond = 11218.2
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51961060 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0572s; samplesPerSecond = 11193.5
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656067 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.0571s; samplesPerSecond = 11201.0
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397217 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0571s; samplesPerSecond = 11201.5
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780762 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.0570s; samplesPerSecond = 11227.1
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845886 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0570s; samplesPerSecond = 11233.2
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06457214 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0569s; samplesPerSecond = 11238.5
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91632080 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0570s; samplesPerSecond = 11227.1
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90608521 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.0576s; samplesPerSecond = 11113.6
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095459 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0576s; samplesPerSecond = 11115.5
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67088013 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0569s; samplesPerSecond = 11240.1
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608643 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0570s; samplesPerSecond = 11218.8
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54733276 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0571s; samplesPerSecond = 11217.4
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925659 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0570s; samplesPerSecond = 11220.6
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52387695 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0579s; samplesPerSecond = 11057.2
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47543945 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.0570s; samplesPerSecond = 11226.5
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265381 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.0568s; samplesPerSecond = 11274.2
+05/03/2016 14:28:08:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728516 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.0570s; samplesPerSecond = 11229.3
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674561 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0569s; samplesPerSecond = 11251.4
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020508 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.0569s; samplesPerSecond = 11243.9
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400757 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0570s; samplesPerSecond = 11225.5
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885010 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.0572s; samplesPerSecond = 11185.5
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22711792 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.0571s; samplesPerSecond = 11199.4
+05/03/2016 14:28:09:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604858 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0568s; samplesPerSecond = 11270.4
+05/03/2016 14:28:09: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704632 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=1.85033s
+05/03/2016 14:28:09: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 14:27:32: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 14:28:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:32: Starting minibatch loop.
-05/03/2016 14:27:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257511 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.1671s; samplesPerSecond = 15321.8
-05/03/2016 14:27:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548573 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1647s; samplesPerSecond = 15545.9
-05/03/2016 14:27:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766983 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.1653s; samplesPerSecond = 15485.2
-05/03/2016 14:27:32:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049370 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.1657s; samplesPerSecond = 15452.7
-05/03/2016 14:27:32:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178452 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.1649s; samplesPerSecond = 15519.9
-05/03/2016 14:27:33:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359482 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.1650s; samplesPerSecond = 15511.6
-05/03/2016 14:27:33:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765289 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.1644s; samplesPerSecond = 15570.6
-05/03/2016 14:27:33:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682800 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.1637s; samplesPerSecond = 15639.4
-05/03/2016 14:27:33: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576057 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.32665s
-05/03/2016 14:27:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'
+05/03/2016 14:28:09: Starting minibatch loop.
+05/03/2016 14:28:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257511 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.1699s; samplesPerSecond = 15069.5
+05/03/2016 14:28:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548573 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1697s; samplesPerSecond = 15086.2
+05/03/2016 14:28:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766983 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.1699s; samplesPerSecond = 15066.4
+05/03/2016 14:28:10:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049370 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.1922s; samplesPerSecond = 13317.5
+05/03/2016 14:28:10:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178452 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.1687s; samplesPerSecond = 15174.2
+05/03/2016 14:28:10:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359482 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.1698s; samplesPerSecond = 15073.5
+05/03/2016 14:28:10:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765289 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.1697s; samplesPerSecond = 15086.5
+05/03/2016 14:28:10:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682800 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.1689s; samplesPerSecond = 15156.4
+05/03/2016 14:28:10: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576057 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.38388s
+05/03/2016 14:28:10: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 14:27:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:33: Starting minibatch loop.
-05/03/2016 14:27:34:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.5871s; samplesPerSecond = 17440.6
-05/03/2016 14:27:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5768s; samplesPerSecond = 17752.4
-05/03/2016 14:27:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.16924s
-05/03/2016 14:27:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 14:27:34: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:10: Starting minibatch loop.
+05/03/2016 14:28:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.5866s; samplesPerSecond = 17455.7
+05/03/2016 14:28:12:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5830s; samplesPerSecond = 17564.4
+05/03/2016 14:28:12: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.17506s
+05/03/2016 14:28:12: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:12: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:27:34: Action "train" complete.
+05/03/2016 14:28:12: Action "train" complete.
 
-05/03/2016 14:27:34: __COMPLETED__
+05/03/2016 14:28:12: __COMPLETED__
 === Deleting last epoch data
 ==== Re-running from checkpoint
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu DeviceId=-1 timestamping=true makeMode=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu DeviceId=-1 timestamping=true makeMode=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -539,31 +541,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:35: -------------------------------------------------------------------
-05/03/2016 14:27:35: Build info: 
+05/03/2016 14:28:12: -------------------------------------------------------------------
+05/03/2016 14:28:12: Build info: 
 
-05/03/2016 14:27:35: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:35: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:35: 		Build type: Release
-05/03/2016 14:27:35: 		Build target: GPU
-05/03/2016 14:27:35: 		With 1bit-SGD: no
-05/03/2016 14:27:35: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:35: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:35: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:35: 		Build Branch: HEAD
-05/03/2016 14:27:35: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:35: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:35: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:35: -------------------------------------------------------------------
+05/03/2016 14:28:12: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:12: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:12: 		Build type: Release
+05/03/2016 14:28:12: 		Build target: GPU
+05/03/2016 14:28:12: 		With 1bit-SGD: no
+05/03/2016 14:28:12: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:12: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:12: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:12: 		Build Branch: HEAD
+05/03/2016 14:28:12: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:12: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:12: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:12: -------------------------------------------------------------------
 
-05/03/2016 14:27:35: Running on cntk-muc02 at 2016/05/03 14:27:35
-05/03/2016 14:27:35: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  makeMode=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 14:28:12: Running on cntk-muc02 at 2016/05/03 14:28:12
+05/03/2016 14:28:12: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu  DeviceId=-1  timestamping=true  makeMode=true
 
 
 
-05/03/2016 14:27:35: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:35: precision = "float"
+05/03/2016 14:28:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:12: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -649,26 +651,25 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:35: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:35: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:35: precision = "float"
+05/03/2016 14:28:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:12: precision = "float"
 command = speechTrain
 deviceId = -1
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -747,31 +748,30 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 DeviceId=-1
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:35: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:35: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:makeMode=true
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn"
     deviceId = -1
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -848,33 +848,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:35: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:35: Commands: speechTrain
-05/03/2016 14:27:35: Precision = "float"
-05/03/2016 14:27:35: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn
-05/03/2016 14:27:35: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:35: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 14:28:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:12: Commands: speechTrain
+05/03/2016 14:28:12: Precision = "float"
+05/03/2016 14:28:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn
+05/03/2016 14:28:12: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 14:27:35: ##############################################################################
-05/03/2016 14:27:35: #                                                                            #
-05/03/2016 14:27:35: # Action "train"                                                             #
-05/03/2016 14:27:35: #                                                                            #
-05/03/2016 14:27:35: ##############################################################################
+05/03/2016 14:28:12: ##############################################################################
+05/03/2016 14:28:12: #                                                                            #
+05/03/2016 14:28:12: # Action "train"                                                             #
+05/03/2016 14:28:12: #                                                                            #
+05/03/2016 14:28:12: ##############################################################################
 
-05/03/2016 14:27:35: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:12: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:35: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'.
+05/03/2016 14:28:13: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn.2'.
 
 Post-processing network...
 
@@ -926,14 +925,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:35: Loaded model with 25 nodes on CPU.
+05/03/2016 14:28:13: Loaded model with 25 nodes on CPU.
 
-05/03/2016 14:27:35: Training criterion node(s):
-05/03/2016 14:27:35: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:13: Training criterion node(s):
+05/03/2016 14:28:13: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:35: Evaluation criterion node(s):
+05/03/2016 14:28:13: Evaluation criterion node(s):
 
-05/03/2016 14:27:35: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:13: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -941,46 +940,48 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *1]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *1]] [PosteriorProb Value[132 x 1 x *1]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *1]] [features Gradient[363 x *1]] [labels Gradient[132 x *1]] }
-00000090FB2BF8F0: {[B1 Value[512 x 1]] }
-00000090FB2BF990: {[B2 Value[132 x 1]] }
-00000090FB2BFB70: {[InvStdOfFeatures Value[363]] }
-00000090FB2BFF30: {[features Value[363 x *1]] }
-00000090FB2C02F0: {[B0 Value[512 x 1]] }
-00000090FB2C0430: {[labels Value[132 x *1]] }
-00000090FB2C06B0: {[MeanOfFeatures Value[363]] }
-00000090FCFB9CA0: {[B2 Gradient[132 x 1]] }
-00000090FCFBA240: {[MVNormalizedFeatures Value[363 x *1]] }
-00000090FCFBA2E0: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
-00000090FCFBA380: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
-00000090FCFBA4C0: {[CrossEntropyWithSoftmax Value[1]] }
-00000090FCFBA560: {[W2*H1 Gradient[132 x 1 x *1]] }
-00000090FCFBA740: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
-00000090FCFBAA60: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
-00000090FCFBABA0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
-00000090FCFBAC40: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
-00000090FCFBAE20: {[LogOfPrior Value[132]] }
-00000090FCFBB140: {[CrossEntropyWithSoftmax Gradient[1]] }
-00000090FCFBB1E0: {[W0*features Value[512 x *1]] }
-00000090FCFBBA00: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
-00000090FCFBBAA0: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
-00000090FD048780: {[EvalErrorPrediction Value[1]] }
-00000090FD048820: {[Prior Value[132]] }
-00000090FD048AA0: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
-00000090FD048BE0: {[W0 Value[512 x 363]] }
-00000090FD0492C0: {[W1 Value[512 x 512]] }
-00000090FD049360: {[W2 Value[132 x 512]] }
+00000022B88865D0: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
+00000022B8886670: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
+00000022B8886710: {[Prior Value[132]] }
+00000022B88868F0: {[W0*features Value[512 x *1]] }
+00000022B8886990: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
+00000022B8886AD0: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
+00000022B8886CB0: {[LogOfPrior Value[132]] }
+00000022B8886DF0: {[W1 Value[512 x 512]] }
+00000022B8886E90: {[CrossEntropyWithSoftmax Value[1]] }
+00000022B8886F30: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
+00000022B8887070: {[W2*H1 Gradient[132 x 1 x *1]] }
+00000022B8887110: {[B2 Gradient[132 x 1]] }
+00000022B8887390: {[EvalErrorPrediction Value[1]] }
+00000022B88874D0: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
+00000022B8887610: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
+00000022B8887750: {[MVNormalizedFeatures Value[363 x *1]] }
+00000022B88877F0: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000022B8887A70: {[W0 Value[512 x 363]] }
+00000022B8887BB0: {[W2 Value[132 x 512]] }
+00000022B8887E30: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
+00000022B8887ED0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
+00000022BA5F0A70: {[features Value[363 x *1]] }
+00000022BA5F0D90: {[MeanOfFeatures Value[363]] }
+00000022BA5F0ED0: {[B1 Value[512 x 1]] }
+00000022BA5F11F0: {[B2 Value[132 x 1]] }
+00000022BA5F1470: {[B0 Value[512 x 1]] }
+00000022BA5F15B0: {[InvStdOfFeatures Value[363]] }
+00000022BA5F16F0: {[labels Value[132 x *1]] }
 
-05/03/2016 14:27:35: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:28:13: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:27:35: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:13: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:35: Starting minibatch loop.
-05/03/2016 14:27:36:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.7204s; samplesPerSecond = 14213.5
-05/03/2016 14:27:37:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5661s; samplesPerSecond = 18089.8
-05/03/2016 14:27:37: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.35837s
-05/03/2016 14:27:37: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 14:27:37: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:13: Starting minibatch loop.
+05/03/2016 14:28:14:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.6111s; samplesPerSecond = 16756.1
+05/03/2016 14:28:14:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5890s; samplesPerSecond = 17385.7
+05/03/2016 14:28:14: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.37182s
+05/03/2016 14:28:14: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:14: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:27:37: Action "train" complete.
+05/03/2016 14:28:14: Action "train" complete.
 
-05/03/2016 14:27:37: __COMPLETED__
\ No newline at end of file
+05/03/2016 14:28:14: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.gpu.txt
index 473fea418..f9f22e06b 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu DeviceId=0 timestamping=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -16,31 +16,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:38: -------------------------------------------------------------------
-05/03/2016 14:27:38: Build info: 
+05/03/2016 14:28:15: -------------------------------------------------------------------
+05/03/2016 14:28:15: Build info: 
 
-05/03/2016 14:27:38: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:38: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:38: 		Build type: Release
-05/03/2016 14:27:38: 		Build target: GPU
-05/03/2016 14:27:38: 		With 1bit-SGD: no
-05/03/2016 14:27:38: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:38: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:38: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:38: 		Build Branch: HEAD
-05/03/2016 14:27:38: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:38: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:38: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:38: -------------------------------------------------------------------
+05/03/2016 14:28:15: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:15: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:15: 		Build type: Release
+05/03/2016 14:28:15: 		Build target: GPU
+05/03/2016 14:28:15: 		With 1bit-SGD: no
+05/03/2016 14:28:15: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:15: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:15: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:15: 		Build Branch: HEAD
+05/03/2016 14:28:15: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:15: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:15: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:15: -------------------------------------------------------------------
 
-05/03/2016 14:27:38: Running on cntk-muc02 at 2016/05/03 14:27:38
-05/03/2016 14:27:38: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu  DeviceId=0  timestamping=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 14:28:15: Running on cntk-muc02 at 2016/05/03 14:28:15
+05/03/2016 14:28:15: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 14:27:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:38: precision = "float"
+05/03/2016 14:28:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:15: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -126,25 +126,24 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:38: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:38: precision = "float"
+05/03/2016 14:28:15: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:15: precision = "float"
 command = speechTrain
 deviceId = 0
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -223,30 +222,29 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:38: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:15: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:38: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:15: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:makeMode=false
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -323,33 +321,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:38: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:38: Commands: speechTrain
-05/03/2016 14:27:38: Precision = "float"
-05/03/2016 14:27:38: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn
-05/03/2016 14:27:38: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:38: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 14:28:15: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:15: Commands: speechTrain
+05/03/2016 14:28:15: Precision = "float"
+05/03/2016 14:28:15: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn
+05/03/2016 14:28:15: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:15: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 14:27:38: ##############################################################################
-05/03/2016 14:27:38: #                                                                            #
-05/03/2016 14:27:38: # Action "train"                                                             #
-05/03/2016 14:27:38: #                                                                            #
-05/03/2016 14:27:38: ##############################################################################
+05/03/2016 14:28:15: ##############################################################################
+05/03/2016 14:28:15: #                                                                            #
+05/03/2016 14:28:15: # Action "train"                                                             #
+05/03/2016 14:28:15: #                                                                            #
+05/03/2016 14:28:15: ##############################################################################
 
-05/03/2016 14:27:38: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:15: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:38: Creating virgin network.
+05/03/2016 14:28:16: Creating virgin network.
 Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -402,14 +399,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:39: Created model with 25 nodes on GPU 0.
+05/03/2016 14:28:16: Created model with 25 nodes on GPU 0.
 
-05/03/2016 14:27:39: Training criterion node(s):
-05/03/2016 14:27:39: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:16: Training criterion node(s):
+05/03/2016 14:28:16: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:39: Evaluation criterion node(s):
+05/03/2016 14:28:16: Evaluation criterion node(s):
 
-05/03/2016 14:27:39: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:16: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -417,112 +414,117 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-0000006AE889CF10: {[features Value[363 x *]] }
-0000006AFB2E4BF0: {[B0 Value[512 x 1]] }
-0000006AFB2E50F0: {[W2 Value[132 x 512]] }
-0000006AFB2E5190: {[W0 Value[512 x 363]] }
-0000006AFB2E5730: {[W1 Value[512 x 512]] }
-0000006AFB2E5910: {[B2 Value[132 x 1]] }
-0000006AFB2E5D70: {[InvStdOfFeatures Value[363]] }
-0000006AFB2E5EB0: {[MeanOfFeatures Value[363]] }
-0000006AFB2E68B0: {[B1 Value[512 x 1]] }
-0000006AFDFC5420: {[MVNormalizedFeatures Value[363 x *]] }
-0000006AFDFC5560: {[W0*features Value[512 x *]] }
-0000006AFDFC5600: {[EvalErrorPrediction Value[1]] }
-0000006AFDFC56A0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-0000006AFDFC5920: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-0000006AFDFC59C0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-0000006AFDFC5A60: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-0000006AFDFC5C40: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-0000006AFDFC5CE0: {[labels Value[132 x *]] }
-0000006AFDFC5F60: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-0000006AFDFC61E0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-0000006AFDFC6320: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-0000006AFDFC63C0: {[W2*H1 Gradient[132 x 1 x *]] }
-0000006AFDFC6460: {[Prior Value[132]] }
-0000006AFDFC68C0: {[LogOfPrior Value[132]] }
-0000006AFDFC6B40: {[CrossEntropyWithSoftmax Gradient[1]] }
-0000006AFDFC6DC0: {[B2 Gradient[132 x 1]] }
-0000006AFDFC6FA0: {[CrossEntropyWithSoftmax Value[1]] }
-0000006AFDFC7180: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+000000043CE26000: {[features Value[363 x *]] }
+0000000454D0FBC0: {[W0 Value[512 x 363]] }
+0000000454D0FC60: {[InvStdOfFeatures Value[363]] }
+0000000454D0FDA0: {[W1 Value[512 x 512]] }
+0000000454D0FE40: {[B1 Value[512 x 1]] }
+0000000454D105C0: {[W2 Value[132 x 512]] }
+0000000454D10660: {[B2 Value[132 x 1]] }
+0000000454D10C00: {[B0 Value[512 x 1]] }
+0000000454D10CA0: {[labels Value[132 x *]] }
+0000000454D11420: {[MeanOfFeatures Value[363]] }
+00000004555C5780: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+00000004555C5820: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+00000004555C5B40: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+00000004555C5BE0: {[B2 Gradient[132 x 1]] }
+00000004555C5F00: {[EvalErrorPrediction Value[1]] }
+00000004555C5FA0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+00000004555C6220: {[MVNormalizedFeatures Value[363 x *]] }
+00000004555C6360: {[CrossEntropyWithSoftmax Value[1]] }
+00000004555C6400: {[Prior Value[132]] }
+00000004555C6680: {[LogOfPrior Value[132]] }
+00000004555C6720: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+00000004555C67C0: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+00000004555C6860: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000004555C6900: {[W2*H1 Gradient[132 x 1 x *]] }
+00000004555C6F40: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+00000004555C6FE0: {[W0*features Value[512 x *]] }
+00000004555C7120: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+00000004555C7300: {[ScaledLogLikelihood Value[132 x 1 x *]] }
 
 
-05/03/2016 14:27:39: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 14:28:16: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 14:27:39: 	MeanOfFeatures = Mean()
-05/03/2016 14:27:39: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 14:27:39: 	Prior = Mean()
+05/03/2016 14:28:16: 	MeanOfFeatures = Mean()
+05/03/2016 14:28:16: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 14:28:16: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:41: Precomputing --> Completed.
+05/03/2016 14:28:19: Precomputing --> Completed.
 
 
-05/03/2016 14:27:41: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 14:28:19: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:41: Starting minibatch loop.
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645981 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.1529s; samplesPerSecond = 4185.9
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315750 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0262s; samplesPerSecond = 24429.3
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180588 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0266s; samplesPerSecond = 24082.8
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158096 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0265s; samplesPerSecond = 24185.6
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668945 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0263s; samplesPerSecond = 24293.0
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866364 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0265s; samplesPerSecond = 24173.7
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51809235 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0264s; samplesPerSecond = 24215.8
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455200 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0264s; samplesPerSecond = 24283.8
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829346 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0262s; samplesPerSecond = 24410.7
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167236 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0263s; samplesPerSecond = 24317.0
-05/03/2016 14:27:41:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861633 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0265s; samplesPerSecond = 24105.5
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32616882 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0264s; samplesPerSecond = 24256.2
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16897583 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0264s; samplesPerSecond = 24203.9
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08891907 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0263s; samplesPerSecond = 24377.2
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06005249 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0262s; samplesPerSecond = 24412.6
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128540 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0262s; samplesPerSecond = 24448.9
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90172119 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0263s; samplesPerSecond = 24344.8
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262329 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0262s; samplesPerSecond = 24443.3
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515503 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0263s; samplesPerSecond = 24335.5
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67383423 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0263s; samplesPerSecond = 24361.5
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869263 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0265s; samplesPerSecond = 24141.8
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032349 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0261s; samplesPerSecond = 24492.9
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134033 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0264s; samplesPerSecond = 24208.5
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362549 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0263s; samplesPerSecond = 24313.3
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640015 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0266s; samplesPerSecond = 24077.3
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745483 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0264s; samplesPerSecond = 24270.9
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16415405 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0260s; samplesPerSecond = 24658.1
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30347290 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0265s; samplesPerSecond = 24165.5
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398804 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0268s; samplesPerSecond = 23920.8
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322266 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0258s; samplesPerSecond = 24838.9
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664429 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0262s; samplesPerSecond = 24463.0
-05/03/2016 14:27:42:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246582 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0265s; samplesPerSecond = 24117.3
-05/03/2016 14:27:42: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000324 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=1.03095s
-05/03/2016 14:27:42: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.1'
+05/03/2016 14:28:19: Starting minibatch loop.
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645981 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.0296s; samplesPerSecond = 21654.5
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315750 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0240s; samplesPerSecond = 26626.7
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180588 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0241s; samplesPerSecond = 26585.8
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158096 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0241s; samplesPerSecond = 26582.5
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668945 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0241s; samplesPerSecond = 26574.8
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866364 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0241s; samplesPerSecond = 26528.5
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51809235 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0241s; samplesPerSecond = 26588.0
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455200 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0240s; samplesPerSecond = 26621.2
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829346 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0241s; samplesPerSecond = 26527.4
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167236 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0237s; samplesPerSecond = 27056.7
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861633 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0232s; samplesPerSecond = 27619.5
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32616882 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0245s; samplesPerSecond = 26152.3
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16897583 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0242s; samplesPerSecond = 26490.1
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08891907 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0235s; samplesPerSecond = 27212.0
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06005249 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0235s; samplesPerSecond = 27193.5
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128540 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0236s; samplesPerSecond = 27123.2
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90172119 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0235s; samplesPerSecond = 27176.2
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262329 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0236s; samplesPerSecond = 27103.7
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515503 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0235s; samplesPerSecond = 27205.1
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67383423 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0235s; samplesPerSecond = 27186.6
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869263 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0235s; samplesPerSecond = 27192.4
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032349 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0235s; samplesPerSecond = 27183.1
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134033 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0235s; samplesPerSecond = 27257.2
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362549 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0234s; samplesPerSecond = 27330.6
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640015 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0236s; samplesPerSecond = 27131.3
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745483 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0235s; samplesPerSecond = 27176.2
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16415405 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0230s; samplesPerSecond = 27788.6
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30347290 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0226s; samplesPerSecond = 28296.0
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398804 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0226s; samplesPerSecond = 28324.9
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322266 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0227s; samplesPerSecond = 28232.4
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664429 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0227s; samplesPerSecond = 28183.9
+05/03/2016 14:28:19:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246582 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0226s; samplesPerSecond = 28269.8
+05/03/2016 14:28:19: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000324 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.761972s
+05/03/2016 14:28:20: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 14:27:42: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 14:28:20: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:42: Starting minibatch loop.
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151951 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0479s; samplesPerSecond = 53475.9
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395710 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0404s; samplesPerSecond = 63375.7
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575516 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0404s; samplesPerSecond = 63397.7
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485039 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0403s; samplesPerSecond = 63514.1
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324280 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0402s; samplesPerSecond = 63741.8
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109344 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0398s; samplesPerSecond = 64350.7
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496002 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0407s; samplesPerSecond = 62834.4
-05/03/2016 14:27:42:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944366 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0398s; samplesPerSecond = 64384.7
-05/03/2016 14:27:42: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560276 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.331755s
-05/03/2016 14:27:42: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'
+05/03/2016 14:28:20: Starting minibatch loop.
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151951 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0458s; samplesPerSecond = 55926.9
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395710 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0387s; samplesPerSecond = 66165.3
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575516 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0386s; samplesPerSecond = 66269.7
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485039 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0386s; samplesPerSecond = 66355.6
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324280 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0390s; samplesPerSecond = 65585.5
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109344 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0391s; samplesPerSecond = 65451.4
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496002 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0388s; samplesPerSecond = 65957.3
+05/03/2016 14:28:20:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944366 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0391s; samplesPerSecond = 65478.2
+05/03/2016 14:28:20: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560276 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.319428s
+05/03/2016 14:28:20: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 14:27:43: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:43: Starting minibatch loop.
-05/03/2016 14:27:43:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1328s; samplesPerSecond = 77108.4
-05/03/2016 14:27:43:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1140s; samplesPerSecond = 89788.3
-05/03/2016 14:27:43: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.254239s
-05/03/2016 14:27:43: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 14:27:43: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:20: Starting minibatch loop.
+05/03/2016 14:28:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1273s; samplesPerSecond = 80424.7
+05/03/2016 14:28:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1127s; samplesPerSecond = 90826.8
+05/03/2016 14:28:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.244511s
+05/03/2016 14:28:20: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:20: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:27:43: Action "train" complete.
+05/03/2016 14:28:20: Action "train" complete.
 
-05/03/2016 14:27:43: __COMPLETED__
+05/03/2016 14:28:20: __COMPLETED__
 === Deleting last epoch data
 ==== Re-running from checkpoint
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu DeviceId=0 timestamping=true makeMode=true speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu DeviceId=0 timestamping=true makeMode=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -540,31 +542,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:44: -------------------------------------------------------------------
-05/03/2016 14:27:44: Build info: 
+05/03/2016 14:28:21: -------------------------------------------------------------------
+05/03/2016 14:28:21: Build info: 
 
-05/03/2016 14:27:44: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:44: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:44: 		Build type: Release
-05/03/2016 14:27:44: 		Build target: GPU
-05/03/2016 14:27:44: 		With 1bit-SGD: no
-05/03/2016 14:27:44: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:44: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:44: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:44: 		Build Branch: HEAD
-05/03/2016 14:27:44: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:44: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:44: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:44: -------------------------------------------------------------------
+05/03/2016 14:28:21: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:21: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:21: 		Build type: Release
+05/03/2016 14:28:21: 		Build target: GPU
+05/03/2016 14:28:21: 		With 1bit-SGD: no
+05/03/2016 14:28:21: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:21: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:21: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:21: 		Build Branch: HEAD
+05/03/2016 14:28:21: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:21: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:21: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:21: -------------------------------------------------------------------
 
-05/03/2016 14:27:44: Running on cntk-muc02 at 2016/05/03 14:27:44
-05/03/2016 14:27:44: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu  DeviceId=0  timestamping=true  makeMode=true  speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
+05/03/2016 14:28:21: Running on cntk-muc02 at 2016/05/03 14:28:21
+05/03/2016 14:28:21: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu  DeviceId=0  timestamping=true  makeMode=true
 
 
 
-05/03/2016 14:27:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:44: precision = "float"
+05/03/2016 14:28:21: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:21: precision = "float"
 command = speechTrain
 deviceId = $DeviceId$
 parallelTrain = false
@@ -650,26 +652,25 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:21: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:44: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:44: precision = "float"
+05/03/2016 14:28:21: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:21: precision = "float"
 command = speechTrain
 deviceId = 0
 parallelTrain = false
 makeMode = false
 speechTrain = [
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -748,31 +749,30 @@ speechTrain = [
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 DeviceId=0
 timestamping=true
 makeMode=true
-speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]
 
-05/03/2016 14:27:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:21: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:44: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:21: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\QuickE2E
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:makeMode=true
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:parallelTrain=false
 configparameters: cntk.cntk:precision=float
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action = "train"
-    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn"
+    modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn"
     deviceId = 0
     traceLevel = 1
     SimpleNetworkBuilder = [
@@ -849,33 +849,32 @@ configparameters: cntk.cntk:speechTrain=[
             labelType = "category"
         ]
     ]
-] [reader=[readerType=ExperimentalHTKMLFReader]]
+]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:44: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:44: Commands: speechTrain
-05/03/2016 14:27:44: Precision = "float"
-05/03/2016 14:27:44: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn
-05/03/2016 14:27:44: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:44: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+05/03/2016 14:28:21: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:21: Commands: speechTrain
+05/03/2016 14:28:21: Precision = "float"
+05/03/2016 14:28:21: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn
+05/03/2016 14:28:21: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:21: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
 
-05/03/2016 14:27:44: ##############################################################################
-05/03/2016 14:27:44: #                                                                            #
-05/03/2016 14:27:44: # Action "train"                                                             #
-05/03/2016 14:27:44: #                                                                            #
-05/03/2016 14:27:44: ##############################################################################
+05/03/2016 14:28:21: ##############################################################################
+05/03/2016 14:28:21: #                                                                            #
+05/03/2016 14:28:21: # Action "train"                                                             #
+05/03/2016 14:28:21: #                                                                            #
+05/03/2016 14:28:21: ##############################################################################
 
-05/03/2016 14:27:44: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:21: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:44: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'.
+05/03/2016 14:28:22: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn.2'.
 
 Post-processing network...
 
@@ -927,14 +926,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:45: Loaded model with 25 nodes on GPU 0.
+05/03/2016 14:28:22: Loaded model with 25 nodes on GPU 0.
 
-05/03/2016 14:27:45: Training criterion node(s):
-05/03/2016 14:27:45: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:22: Training criterion node(s):
+05/03/2016 14:28:22: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:45: Evaluation criterion node(s):
+05/03/2016 14:28:22: Evaluation criterion node(s):
 
-05/03/2016 14:27:45: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:22: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -942,46 +941,48 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *1]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *1]] [PosteriorProb Value[132 x 1 x *1]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *1]] [features Gradient[363 x *1]] [labels Gradient[132 x *1]] }
-000000F1BF6A0110: {[B0 Value[512 x 1]] }
-000000F1DA5A20C0: {[W0 Value[512 x 363]] }
-000000F1DA5A22A0: {[W1 Value[512 x 512]] }
-000000F1DA5A23E0: {[Prior Value[132]] }
-000000F1DA5A2700: {[InvStdOfFeatures Value[363]] }
-000000F1DA5A2A20: {[W2 Value[132 x 512]] }
-000000F1DA5A32E0: {[B1 Value[512 x 1]] }
-000000F1DA5A3380: {[labels Value[132 x *1]] }
-000000F1DA5A3420: {[B2 Value[132 x 1]] }
-000000F1DA5A3BA0: {[features Value[363 x *1]] }
-000000F1DA5A3E20: {[MeanOfFeatures Value[363]] }
-000000F1DA991C00: {[CrossEntropyWithSoftmax Value[1]] }
-000000F1DA991CA0: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
-000000F1DA991DE0: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
-000000F1DA991F20: {[W0*features Value[512 x *1]] }
-000000F1DA991FC0: {[W2*H1 Gradient[132 x 1 x *1]] }
-000000F1DA9922E0: {[EvalErrorPrediction Value[1]] }
-000000F1DA992380: {[MVNormalizedFeatures Value[363 x *1]] }
-000000F1DA992420: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
-000000F1DA9926A0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
-000000F1DA992880: {[CrossEntropyWithSoftmax Gradient[1]] }
-000000F1DA992BA0: {[B2 Gradient[132 x 1]] }
-000000F1DA993460: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
-000000F1DA993500: {[LogOfPrior Value[132]] }
-000000F1DA9935A0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
-000000F1DA993640: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
-000000F1DA993780: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
-000000F1DA9938C0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
+0000006E90AC0ED0: {[B0 Value[512 x 1]] }
+0000006EA6C56760: {[MeanOfFeatures Value[363]] }
+0000006EA6C56C60: {[labels Value[132 x *1]] }
+0000006EA6C570C0: {[W2 Value[132 x 512]] }
+0000006EA6C57660: {[Prior Value[132]] }
+0000006EA6C57840: {[W1 Value[512 x 512]] }
+0000006EA6C57CA0: {[features Value[363 x *1]] }
+0000006EA6C57D40: {[InvStdOfFeatures Value[363]] }
+0000006EA6C57E80: {[B1 Value[512 x 1]] }
+0000006EA6C58100: {[W0 Value[512 x 363]] }
+0000006EA6C58240: {[B2 Value[132 x 1]] }
+0000006EA9093090: {[H1 Value[512 x 1 x *1]] [W0*features Gradient[512 x *1]] }
+0000006EA90934F0: {[CrossEntropyWithSoftmax Value[1]] }
+0000006EA90936D0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *1]] }
+0000006EA9093BD0: {[HLast Value[132 x 1 x *1]] [W2 Gradient[132 x 512]] }
+0000006EA9093DB0: {[W0*features Value[512 x *1]] }
+0000006EA9093E50: {[W0*features+B0 Gradient[512 x 1 x *1]] [W1*H1 Value[512 x 1 x *1]] }
+0000006EA9094350: {[B2 Gradient[132 x 1]] }
+0000006EA90943F0: {[LogOfPrior Value[132]] }
+0000006EA9094490: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *1]] [W1*H1+B1 Gradient[512 x 1 x *1]] [W2*H1 Value[132 x 1 x *1]] }
+0000006EA9094530: {[EvalErrorPrediction Value[1]] }
+0000006EA9094710: {[H2 Value[512 x 1 x *1]] [W1*H1 Gradient[512 x 1 x *1]] }
+0000006EA9094850: {[CrossEntropyWithSoftmax Gradient[1]] }
+0000006EA90948F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *1]] [HLast Gradient[132 x 1 x *1]] }
+0000006EA9094990: {[W2*H1 Gradient[132 x 1 x *1]] }
+0000006EA9094A30: {[MVNormalizedFeatures Value[363 x *1]] }
+0000006EA9094B70: {[ScaledLogLikelihood Value[132 x 1 x *1]] }
+0000006EA9094DF0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *1]] }
 
-05/03/2016 14:27:45: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:28:22: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:27:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:22: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:45: Starting minibatch loop.
-05/03/2016 14:27:45:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.4191s; samplesPerSecond = 24435.7
-05/03/2016 14:27:45:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1130s; samplesPerSecond = 90633.9
-05/03/2016 14:27:45: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.605847s
-05/03/2016 14:27:45: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_QuickE2E@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 14:27:45: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:22: Starting minibatch loop.
+05/03/2016 14:28:22:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.3019s; samplesPerSecond = 33920.3
+05/03/2016 14:28:23:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1132s; samplesPerSecond = 90454.6
+05/03/2016 14:28:23: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.577219s
+05/03/2016 14:28:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_QuickE2E@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:23: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:27:45: Action "train" complete.
+05/03/2016 14:28:23: Action "train" complete.
 
-05/03/2016 14:27:45: __COMPLETED__
\ No newline at end of file
+05/03/2016 14:28:23: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/run-test
index 5b2f6432b..f341f16b3 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/QuickE2E/run-test
@@ -5,6 +5,12 @@
 OriginalTestDir=../../QuickE2E
 ConfigDir=$TEST_DIR/$OriginalTestDir
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkrun <CNTK config file name> <additional CNTK args>
 DeleteModelsAfterTest=0
 cntkrun cntk.cntk 'speechTrain=[reader=[readerType=ExperimentalHTKMLFReader]]' || exit $?
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.cpu.txt
index 6842f6df2..cf7934d9a 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.cpu.txt
@@ -1,4 +1,4 @@
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu DeviceId=-1 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu DeviceId=-1 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -17,32 +17,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:17: -------------------------------------------------------------------
-05/03/2016 18:18:17: Build info: 
+05/03/2016 18:18:48: -------------------------------------------------------------------
+05/03/2016 18:18:48: Build info: 
 
-05/03/2016 18:18:17: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:17: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:17: 		Build type: release
-05/03/2016 18:18:17: 		Build target: GPU
-05/03/2016 18:18:17: 		With 1bit-SGD: no
-05/03/2016 18:18:17: 		Math lib: acml
-05/03/2016 18:18:17: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:17: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:17: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:17: 		Build Branch: HEAD
-05/03/2016 18:18:17: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:17: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:17: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:17: -------------------------------------------------------------------
+05/03/2016 18:18:48: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:48: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:48: 		Build type: release
+05/03/2016 18:18:48: 		Build target: GPU
+05/03/2016 18:18:48: 		With 1bit-SGD: no
+05/03/2016 18:18:48: 		Math lib: acml
+05/03/2016 18:18:48: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:48: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:48: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:48: 		Build Branch: HEAD
+05/03/2016 18:18:48: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:48: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:48: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:48: -------------------------------------------------------------------
 
-05/03/2016 18:18:17: Running on localhost at 2016/05/03 18:18:17
-05/03/2016 18:18:17: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu  DeviceId=-1  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 18:18:48: Running on localhost at 2016/05/03 18:18:48
+05/03/2016 18:18:48: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu  DeviceId=-1  timestamping=true
 
 
 
-05/03/2016 18:18:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:17: precision=float
+05/03/2016 18:18:48: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:48: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=$DeviceId$
 speechTrain=[
@@ -120,25 +120,23 @@ SVDTrain=[
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 DeviceId=-1
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:18:17: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:48: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:17: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:17: precision=float
+05/03/2016 18:18:48: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:48: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=-1
 speechTrain=[
     action=train
     makeMode=false
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn
     deviceId=-1
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -183,19 +181,19 @@ reader=[
 ]
 modelDecomposition=[
     action=SVD
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn
+    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 SVDTrain=[
     action=train
     makeMode=true
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
     deviceId=-1
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/nonExistent.ndl
+        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -210,32 +208,30 @@ SVDTrain=[
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 DeviceId=-1
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:18:17: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:48: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:17: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:48: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain:modelDecomposition:SVDTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:modelDecomposition=[
     action=SVD
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn
+    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 configparameters: cntk.cntk:precision=float
 configparameters: cntk.cntk:reader=[
     readerType=HTKMLFReader
@@ -254,13 +250,13 @@ configparameters: cntk.cntk:reader=[
         labelDim=132
         labelType=Category
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action=train
     makeMode=false
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn
     deviceId=-1
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -289,11 +285,11 @@ configparameters: cntk.cntk:speechTrain=[
 configparameters: cntk.cntk:SVDTrain=[
     action=train
     makeMode=true
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
     deviceId=-1
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/nonExistent.ndl
+        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -309,32 +305,31 @@ configparameters: cntk.cntk:SVDTrain=[
 ]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:17: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:17: Commands: speechTrain modelDecomposition SVDTrain
-05/03/2016 18:18:17: Precision = "float"
-05/03/2016 18:18:17: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-05/03/2016 18:18:17: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:17: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
-05/03/2016 18:18:17: CNTKCommandTrainInfo: SVDTrain : 2
-05/03/2016 18:18:17: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
+05/03/2016 18:18:48: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:48: Commands: speechTrain modelDecomposition SVDTrain
+05/03/2016 18:18:48: Precision = "float"
+05/03/2016 18:18:48: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn
+05/03/2016 18:18:48: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:48: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
+05/03/2016 18:18:48: CNTKCommandTrainInfo: SVDTrain : 2
+05/03/2016 18:18:48: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
 
-05/03/2016 18:18:17: ##############################################################################
-05/03/2016 18:18:17: #                                                                            #
-05/03/2016 18:18:17: # Action "train"                                                             #
-05/03/2016 18:18:17: #                                                                            #
-05/03/2016 18:18:17: ##############################################################################
+05/03/2016 18:18:48: ##############################################################################
+05/03/2016 18:18:48: #                                                                            #
+05/03/2016 18:18:48: # Action "train"                                                             #
+05/03/2016 18:18:48: #                                                                            #
+05/03/2016 18:18:48: ##############################################################################
 
-05/03/2016 18:18:17: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:48: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:17: Creating virgin network.
+05/03/2016 18:18:48: Creating virgin network.
 
 Post-processing network...
 
@@ -386,14 +381,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:17: Created model with 25 nodes on CPU.
+05/03/2016 18:18:48: Created model with 25 nodes on CPU.
 
-05/03/2016 18:18:17: Training criterion node(s):
-05/03/2016 18:18:17: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:48: Training criterion node(s):
+05/03/2016 18:18:48: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:17: Evaluation criterion node(s):
+05/03/2016 18:18:48: Evaluation criterion node(s):
 
-05/03/2016 18:18:17: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:48: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -401,114 +396,119 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-0x19fea88: {[B1 Value[512 x 1]] }
-0x19ffb48: {[InvStdOfFeatures Value[363]] }
-0x1a17708: {[EvalErrorPrediction Value[1]] }
-0x1a1bf08: {[B2 Value[132 x 1]] }
-0x1a3eef8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-0x1a3f0b8: {[W2*H1 Gradient[132 x 1 x *]] }
-0x1a3f278: {[B2 Gradient[132 x 1]] }
-0x1a51448: {[W2 Value[132 x 512]] }
-0x1a91628: {[B0 Value[512 x 1]] }
-0x1a91e48: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x1a92398: {[W0*features Value[512 x *]] }
-0x1ac6238: {[features Value[363 x *]] }
-0x1aeeb58: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-0x1aeed18: {[CrossEntropyWithSoftmax Value[1]] }
-0x1afb268: {[W0 Value[512 x 363]] }
-0x1b02208: {[labels Value[132 x *]] }
-0x1b10c68: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-0x1b10dc8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-0x1b10f88: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-0x1b1d5f8: {[MeanOfFeatures Value[363]] }
-0x1b1ef28: {[W1 Value[512 x 512]] }
-0x1b29d38: {[MVNormalizedFeatures Value[363 x *]] }
-0x1b29e68: {[LogOfPrior Value[132]] }
-0x1b2a0c8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-0x1b2a288: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-0x1b2a448: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-0x1b2a608: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-0x1b30798: {[Prior Value[132]] }
+0x26812f8: {[InvStdOfFeatures Value[363]] }
+0x26816e8: {[B1 Value[512 x 1]] }
+0x2682478: {[W2 Value[132 x 512]] }
+0x2687368: {[W0 Value[512 x 363]] }
+0x26d4148: {[B2 Value[132 x 1]] }
+0x26ebd38: {[labels Value[132 x *]] }
+0x26f7e38: {[Prior Value[132]] }
+0x2706328: {[B0 Value[512 x 1]] }
+0x2712338: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+0x2712548: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+0x2712708: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+0x27128c8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+0x2712a88: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+0x2779448: {[EvalErrorPrediction Value[1]] }
+0x27985f8: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x27987b8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+0x2798978: {[W2*H1 Gradient[132 x 1 x *]] }
+0x2798b38: {[B2 Gradient[132 x 1]] }
+0x279a308: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+0x279a4c8: {[CrossEntropyWithSoftmax Value[1]] }
+0x27a2d68: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+0x27a2f28: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+0x27a37e8: {[features Value[363 x *]] }
+0x27a7398: {[MVNormalizedFeatures Value[363 x *]] }
+0x27a8648: {[MeanOfFeatures Value[363]] }
+0x27ab478: {[W0*features Value[512 x *]] }
+0x27ab578: {[LogOfPrior Value[132]] }
+0x27ac578: {[W1 Value[512 x 512]] }
 
 
-05/03/2016 18:18:17: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 18:18:48: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 18:18:17: 	MeanOfFeatures = Mean()
-05/03/2016 18:18:17: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 18:18:17: 	Prior = Mean()
+05/03/2016 18:18:48: 	MeanOfFeatures = Mean()
+05/03/2016 18:18:48: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 18:18:48: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:21: Precomputing --> Completed.
+05/03/2016 18:18:50: Precomputing --> Completed.
 
 
-05/03/2016 18:18:21: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 18:18:50: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:21: Starting minibatch loop.
-05/03/2016 18:18:21:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181900 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.2095s; samplesPerSecond = 3054.4
-05/03/2016 18:18:21:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675568 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.1293s; samplesPerSecond = 4950.0
-05/03/2016 18:18:21:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684082 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0976s; samplesPerSecond = 6557.0
-05/03/2016 18:18:21:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595383 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.1131s; samplesPerSecond = 5659.8
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007080 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.1064s; samplesPerSecond = 6017.9
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428192 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.1317s; samplesPerSecond = 4858.9
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475586 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.1655s; samplesPerSecond = 3867.1
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591919 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.1053s; samplesPerSecond = 6076.7
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042786 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0866s; samplesPerSecond = 7394.3
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39383850 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.1091s; samplesPerSecond = 5866.1
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078430 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.0979s; samplesPerSecond = 6539.2
-05/03/2016 18:18:22:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35325317 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.1206s; samplesPerSecond = 5308.6
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606934 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.1278s; samplesPerSecond = 5008.3
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110535 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.1168s; samplesPerSecond = 5481.7
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118713 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.1017s; samplesPerSecond = 6295.6
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474365 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.0841s; samplesPerSecond = 7607.5
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89902954 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.1122s; samplesPerSecond = 5702.6
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173340 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.1124s; samplesPerSecond = 5694.2
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969116 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.1208s; samplesPerSecond = 5297.3
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870483 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.1089s; samplesPerSecond = 5879.5
-05/03/2016 18:18:23:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655273 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.1413s; samplesPerSecond = 4529.6
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327515 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.1145s; samplesPerSecond = 5591.8
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53099976 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0968s; samplesPerSecond = 6614.9
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43747559 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.0992s; samplesPerSecond = 6454.5
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41107178 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.1009s; samplesPerSecond = 6344.5
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898926 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.1071s; samplesPerSecond = 5974.1
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965820 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.2658s; samplesPerSecond = 2407.4
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23708496 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.1114s; samplesPerSecond = 5742.7
-05/03/2016 18:18:24:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135376 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.1401s; samplesPerSecond = 4568.2
-05/03/2016 18:18:25:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21607666 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.1032s; samplesPerSecond = 6201.2
-05/03/2016 18:18:25:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29110107 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.1003s; samplesPerSecond = 6383.9
-05/03/2016 18:18:25:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535278 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.1027s; samplesPerSecond = 6232.8
-05/03/2016 18:18:25: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737366 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.90952s
-05/03/2016 18:18:25: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn.1'
+05/03/2016 18:18:50: Starting minibatch loop.
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.39181900 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0607s; samplesPerSecond = 10541.4
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.16675568 * 640; EvalErrorPrediction = 0.87187500 * 640; time = 0.0527s; samplesPerSecond = 12148.6
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98684082 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0503s; samplesPerSecond = 12719.4
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86595383 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0503s; samplesPerSecond = 12713.5
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.81007080 * 640; EvalErrorPrediction = 0.88593750 * 640; time = 0.0498s; samplesPerSecond = 12859.9
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.73428192 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0593s; samplesPerSecond = 10792.2
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.57475586 * 640; EvalErrorPrediction = 0.81875000 * 640; time = 0.0536s; samplesPerSecond = 11932.5
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.43591919 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0517s; samplesPerSecond = 12377.9
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.36042786 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0503s; samplesPerSecond = 12716.8
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39383850 * 640; EvalErrorPrediction = 0.85156250 * 640; time = 0.0522s; samplesPerSecond = 12250.2
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.25078430 * 640; EvalErrorPrediction = 0.76406250 * 640; time = 0.0505s; samplesPerSecond = 12668.3
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.35325317 * 640; EvalErrorPrediction = 0.79375000 * 640; time = 0.0506s; samplesPerSecond = 12655.5
+05/03/2016 18:18:50:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.19606934 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0627s; samplesPerSecond = 10213.4
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.06110535 * 640; EvalErrorPrediction = 0.73125000 * 640; time = 0.1011s; samplesPerSecond = 6329.4
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.05118713 * 640; EvalErrorPrediction = 0.75625000 * 640; time = 0.0517s; samplesPerSecond = 12387.3
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.02474365 * 640; EvalErrorPrediction = 0.74062500 * 640; time = 0.0507s; samplesPerSecond = 12628.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.89902954 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0524s; samplesPerSecond = 12213.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.75173340 * 640; EvalErrorPrediction = 0.68125000 * 640; time = 0.0507s; samplesPerSecond = 12625.8
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83969116 * 640; EvalErrorPrediction = 0.71875000 * 640; time = 0.0507s; samplesPerSecond = 12631.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.62870483 * 640; EvalErrorPrediction = 0.65468750 * 640; time = 0.0506s; samplesPerSecond = 12649.2
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.66655273 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0508s; samplesPerSecond = 12590.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61327515 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0511s; samplesPerSecond = 12512.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53099976 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0506s; samplesPerSecond = 12649.0
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.43747559 * 640; EvalErrorPrediction = 0.64375000 * 640; time = 0.0505s; samplesPerSecond = 12677.0
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.41107178 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0507s; samplesPerSecond = 12629.5
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.48898926 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0640s; samplesPerSecond = 10005.3
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.34965820 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.1326s; samplesPerSecond = 4825.6
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.23708496 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0882s; samplesPerSecond = 7257.1
+05/03/2016 18:18:51:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.33135376 * 640; EvalErrorPrediction = 0.62031250 * 640; time = 0.0689s; samplesPerSecond = 9285.2
+05/03/2016 18:18:52:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21607666 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0847s; samplesPerSecond = 7556.9
+05/03/2016 18:18:52:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.29110107 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0511s; samplesPerSecond = 12531.6
+05/03/2016 18:18:52:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.20535278 * 640; EvalErrorPrediction = 0.57500000 * 640; time = 0.0530s; samplesPerSecond = 12078.0
+05/03/2016 18:18:52: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.01737366 * 20480; EvalErrorPrediction = 0.73061523 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=1.91042s
+05/03/2016 18:18:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 18:18:25: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 18:18:52: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:25: Starting minibatch loop.
-05/03/2016 18:18:25:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711155 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.2165s; samplesPerSecond = 11823.2
-05/03/2016 18:18:25:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925396 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.1981s; samplesPerSecond = 12925.8
-05/03/2016 18:18:25:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826538 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.2459s; samplesPerSecond = 10412.0
-05/03/2016 18:18:26:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095795 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.2156s; samplesPerSecond = 11874.4
-05/03/2016 18:18:26:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550018 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.2603s; samplesPerSecond = 9834.4
-05/03/2016 18:18:26:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561737 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.1865s; samplesPerSecond = 13724.7
-05/03/2016 18:18:27:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069901 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.3596s; samplesPerSecond = 7118.5
-05/03/2016 18:18:27:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857330 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.2419s; samplesPerSecond = 10582.7
-05/03/2016 18:18:27: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199734 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.93019s
-05/03/2016 18:18:27: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn.2'
+05/03/2016 18:18:52: Starting minibatch loop.
+05/03/2016 18:18:52:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.05711155 * 2560; EvalErrorPrediction = 0.55000000 * 2560; time = 0.2134s; samplesPerSecond = 11993.9
+05/03/2016 18:18:52:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02925396 * 2560; EvalErrorPrediction = 0.54648438 * 2560; time = 0.1674s; samplesPerSecond = 15290.3
+05/03/2016 18:18:52:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02826538 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1490s; samplesPerSecond = 17176.6
+05/03/2016 18:18:52:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.97095795 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.1516s; samplesPerSecond = 16891.7
+05/03/2016 18:18:53:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.94550018 * 2560; EvalErrorPrediction = 0.53867188 * 2560; time = 0.1833s; samplesPerSecond = 13968.2
+05/03/2016 18:18:53:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 2.01561737 * 2560; EvalErrorPrediction = 0.54414063 * 2560; time = 0.1496s; samplesPerSecond = 17107.2
+05/03/2016 18:18:53:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.94069901 * 2560; EvalErrorPrediction = 0.52500000 * 2560; time = 0.1493s; samplesPerSecond = 17145.9
+05/03/2016 18:18:53:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.94857330 * 2560; EvalErrorPrediction = 0.54023438 * 2560; time = 0.1476s; samplesPerSecond = 17341.9
+05/03/2016 18:18:53: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.99199734 * 20480; EvalErrorPrediction = 0.54179687 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.31607s
+05/03/2016 18:18:53: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 18:18:27: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:27: Starting minibatch loop.
-05/03/2016 18:18:28:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.6953s; samplesPerSecond = 14727.0
-05/03/2016 18:18:28:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.5752s; samplesPerSecond = 17801.0
-05/03/2016 18:18:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.27598s
-05/03/2016 18:18:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:28: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:53: Starting minibatch loop.
+05/03/2016 18:18:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.91946163 * 10240; EvalErrorPrediction = 0.52890625 * 10240; time = 0.5611s; samplesPerSecond = 18249.9
+05/03/2016 18:18:54:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91066799 * 10240; EvalErrorPrediction = 0.52783203 * 10240; time = 0.6789s; samplesPerSecond = 15082.4
+05/03/2016 18:18:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.91506481 * 20480; EvalErrorPrediction = 0.52836914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=1.24565s
+05/03/2016 18:18:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 18:18:54: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:28: Action "train" complete.
+05/03/2016 18:18:54: Action "train" complete.
 
 
-05/03/2016 18:18:28: ##############################################################################
-05/03/2016 18:18:28: #                                                                            #
-05/03/2016 18:18:28: # Action "SVD"                                                               #
-05/03/2016 18:18:28: #                                                                            #
-05/03/2016 18:18:28: ##############################################################################
+05/03/2016 18:18:54: ##############################################################################
+05/03/2016 18:18:54: #                                                                            #
+05/03/2016 18:18:54: # Action "SVD"                                                               #
+05/03/2016 18:18:54: #                                                                            #
+05/03/2016 18:18:54: ##############################################################################
 
 
 Post-processing network...
@@ -564,8 +564,8 @@ Post-processing network complete.
 --------------------------------------------------------------------------------------------
 ParameterSVD: start to process group 0 with KeepRatio=0.50
 --------------------------------------------------------------------------------------------
-Performing SVD for a   512-by-363   matrix (node name: W0                  ) ---  computation time  0.14 secs ;  keep 50.0% energy ===> keep   104 svd values (reduce to 49.0% parameters) 
-Performing SVD for a   512-by-512   matrix (node name: W1                  ) ---  computation time  0.27 secs ;  keep 50.0% energy ===> keep   128 svd values (reduce to 50.0% parameters) 
+Performing SVD for a   512-by-363   matrix (node name: W0                  ) ---  computation time  0.16 secs ;  keep 50.0% energy ===> keep   104 svd values (reduce to 49.0% parameters) 
+Performing SVD for a   512-by-512   matrix (node name: W1                  ) ---  computation time  0.18 secs ;  keep 50.0% energy ===> keep   128 svd values (reduce to 50.0% parameters) 
 Performing SVD for a   132-by-512   matrix (node name: W2                  ) ---  computation time  0.04 secs ;  keep 50.0% energy ===> keep    32 svd values (reduce to 30.5% parameters) 
 
 Post-processing network...
@@ -625,26 +625,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 18:18:29: Action "SVD" complete.
+05/03/2016 18:18:55: Action "SVD" complete.
 
 
-05/03/2016 18:18:29: ##############################################################################
-05/03/2016 18:18:29: #                                                                            #
-05/03/2016 18:18:29: # Action "train"                                                             #
-05/03/2016 18:18:29: #                                                                            #
-05/03/2016 18:18:29: ##############################################################################
+05/03/2016 18:18:55: ##############################################################################
+05/03/2016 18:18:55: #                                                                            #
+05/03/2016 18:18:55: # Action "train"                                                             #
+05/03/2016 18:18:55: #                                                                            #
+05/03/2016 18:18:55: ##############################################################################
 
-05/03/2016 18:18:29: CNTKCommandTrainBegin: SVDTrain
+05/03/2016 18:18:55: CNTKCommandTrainBegin: SVDTrain
 NDLBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:29: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0'.
+05/03/2016 18:18:55: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0'.
 
 Post-processing network...
 
@@ -702,14 +701,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:29: Loaded model with 31 nodes on CPU.
+05/03/2016 18:18:55: Loaded model with 31 nodes on CPU.
 
-05/03/2016 18:18:29: Training criterion node(s):
-05/03/2016 18:18:29: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:55: Training criterion node(s):
+05/03/2016 18:18:55: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:29: Evaluation criterion node(s):
+05/03/2016 18:18:55: Evaluation criterion node(s):
 
-05/03/2016 18:18:29: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:55: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -717,60 +716,63 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *3]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *3]] [PosteriorProb Value[132 x 1 x *3]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *3]] [features Gradient[363 x *3]] [labels Gradient[132 x *3]] }
-0x7fdd3a007748: {[W1-V Value[128 x 512]] }
-0x7fdd3a0160e8: {[W0-V Value[104 x 363]] }
-0x7fdd3a01c308: {[B0 Value[512 x 1]] }
-0x7fdd3a027fc8: {[LogOfPrior Value[132]] }
-0x7fdd3a0284c8: {[B1 Value[512 x 1]] }
-0x7fdd3a04d028: {[labels Value[132 x *3]] }
-0x7fdd3a04e268: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x7fdd3a04e428: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
-0x7fdd3a04e5e8: {[W2*H1 Gradient[132 x 1 x *3]] }
-0x7fdd3a04e7a8: {[B2 Gradient[132 x 1]] }
-0x7fdd3a05e8b8: {[W2-V Value[32 x 512]] }
-0x7fdd3a0a2118: {[Prior Value[132]] }
-0x7fdd3a0c8368: {[InvStdOfFeatures Value[363]] }
-0x7fdd3a0c86a8: {[W2-SVD Value[132 x 512]] }
-0x7fdd3a0c8b28: {[B2 Value[132 x 1]] }
-0x7fdd3a0c90e8: {[MVNormalizedFeatures Value[363 x *3]] }
-0x7fdd3a0dba98: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
-0x7fdd3a0dbb98: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
-0x7fdd3a0f8b28: {[W1-U Value[512 x 128]] }
-0x7fdd3a0fe2f8: {[W1-SVD Value[512 x 512]] }
-0x7fdd3a0fe578: {[W0-SVD Value[512 x 363]] }
-0x7fdd3a0fe738: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
-0x7fdd3d813c08: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
-0x7fdd3d817ec8: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
-0x7fdd3d818088: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
-0x7fdd3d818248: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
-0x7fdd3d819c58: {[MeanOfFeatures Value[363]] }
-0x7fdd3d81f238: {[features Value[363 x *3]] }
-0x7fdd3d821998: {[CrossEntropyWithSoftmax Value[1]] }
-0x7fdd3d821db8: {[W2-U Value[132 x 32]] }
-0x7fdd3d838b68: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
-0x7fdd3d838d28: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
-0x7fdd3d83c418: {[EvalErrorPrediction Value[1]] }
-0x7fdd3d8419b8: {[W0-U Value[512 x 104]] }
+0x7f79d5a063d8: {[B0 Value[512 x 1]] }
+0x7f79d5a07108: {[InvStdOfFeatures Value[363]] }
+0x7f79d5a09d38: {[labels Value[132 x *3]] }
+0x7f79d5a523f8: {[Prior Value[132]] }
+0x7f79d5a5d5f8: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
+0x7f79d5a87608: {[CrossEntropyWithSoftmax Value[1]] }
+0x7f79d5aaa778: {[W0-V Value[104 x 363]] }
+0x7f79d5aaf0d8: {[B2 Value[132 x 1]] }
+0x7f79d5ad4f58: {[MVNormalizedFeatures Value[363 x *3]] }
+0x7f79d5ad5018: {[W0-SVD Value[512 x 363]] }
+0x7f79d5ad5658: {[LogOfPrior Value[132]] }
+0x7f79d5ad6f38: {[features Value[363 x *3]] }
+0x7f79d5adc428: {[W2-SVD Value[132 x 512]] }
+0x7f79d5adc6a8: {[W1-SVD Value[512 x 512]] }
+0x7f79d5ae9278: {[MeanOfFeatures Value[363]] }
+0x7f79d5af5698: {[W1-V Value[128 x 512]] }
+0x7f79d5af6938: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
+0x7f79d5af6af8: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
+0x7f79d5af6cb8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
+0x7f79d5af6e78: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
+0x7f79d5afbc38: {[EvalErrorPrediction Value[1]] }
+0x7f79d9404248: {[W2-V Value[32 x 512]] }
+0x7f79d941d8f8: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x7f79d941dab8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
+0x7f79d941dc78: {[W2*H1 Gradient[132 x 1 x *3]] }
+0x7f79d9424cb8: {[W0-U Value[512 x 104]] }
+0x7f79d9424f58: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
+0x7f79d9425058: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
+0x7f79d9425488: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
+0x7f79d9425648: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
+0x7f79d9425a48: {[B1 Value[512 x 1]] }
+0x7f79d9426668: {[W2-U Value[132 x 32]] }
+0x7f79d9427958: {[B2 Gradient[132 x 1]] }
+0x7f79d942a1d8: {[W1-U Value[512 x 128]] }
 
-05/03/2016 18:18:29: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:18:55: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:18:29: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:55: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:29: Starting minibatch loop.
-05/03/2016 18:18:30:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90713367 * 10240; EvalErrorPrediction = 0.52988281 * 10240; time = 0.9713s; samplesPerSecond = 10543.1
-05/03/2016 18:18:31:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86631451 * 10240; EvalErrorPrediction = 0.51660156 * 10240; time = 0.6795s; samplesPerSecond = 15070.5
-05/03/2016 18:18:31: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.88672409 * 20480; EvalErrorPrediction = 0.52324219 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-05; epochTime=1.71465s
-05/03/2016 18:18:31: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.1'
+05/03/2016 18:18:55: Starting minibatch loop.
+05/03/2016 18:18:56:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90713367 * 10240; EvalErrorPrediction = 0.52988281 * 10240; time = 0.6371s; samplesPerSecond = 16073.5
+05/03/2016 18:18:56:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86631451 * 10240; EvalErrorPrediction = 0.51660156 * 10240; time = 0.5951s; samplesPerSecond = 17205.8
+05/03/2016 18:18:56: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.88672409 * 20480; EvalErrorPrediction = 0.52324219 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-05; epochTime=1.34774s
+05/03/2016 18:18:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.1'
 
-05/03/2016 18:18:31: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:56: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:31: Starting minibatch loop.
-05/03/2016 18:18:32:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.84094143 * 10240; EvalErrorPrediction = 0.51132813 * 10240; time = 0.9067s; samplesPerSecond = 11293.8
-05/03/2016 18:18:33:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.85906620 * 10240; EvalErrorPrediction = 0.51396484 * 10240; time = 1.1135s; samplesPerSecond = 9196.4
-05/03/2016 18:18:33: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.85000381 * 20480; EvalErrorPrediction = 0.51264648 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-05; epochTime=2.02363s
-05/03/2016 18:18:33: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn'
-05/03/2016 18:18:33: CNTKCommandTrainEnd: SVDTrain
+05/03/2016 18:18:56: Starting minibatch loop.
+05/03/2016 18:18:57:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.84094143 * 10240; EvalErrorPrediction = 0.51132813 * 10240; time = 0.6506s; samplesPerSecond = 15739.4
+05/03/2016 18:18:58:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.85906620 * 10240; EvalErrorPrediction = 0.51396484 * 10240; time = 0.6072s; samplesPerSecond = 16863.3
+05/03/2016 18:18:58: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.85000381 * 20480; EvalErrorPrediction = 0.51264648 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-05; epochTime=1.26397s
+05/03/2016 18:18:58: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn'
+05/03/2016 18:18:58: CNTKCommandTrainEnd: SVDTrain
 
-05/03/2016 18:18:33: Action "train" complete.
+05/03/2016 18:18:58: Action "train" complete.
 
-05/03/2016 18:18:33: __COMPLETED__
\ No newline at end of file
+05/03/2016 18:18:58: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.gpu.txt
index bf2624157..521fcf23f 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu DeviceId=0 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD/cntk.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -17,32 +17,32 @@ Build info:
 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
 -------------------------------------------------------------------
 Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-05/03/2016 18:18:33: -------------------------------------------------------------------
-05/03/2016 18:18:33: Build info: 
+05/03/2016 18:18:58: -------------------------------------------------------------------
+05/03/2016 18:18:58: Build info: 
 
-05/03/2016 18:18:33: 		Built time: May  3 2016 17:56:15
-05/03/2016 18:18:33: 		Last modified date: Tue May  3 11:36:22 2016
-05/03/2016 18:18:33: 		Build type: release
-05/03/2016 18:18:33: 		Build target: GPU
-05/03/2016 18:18:33: 		With 1bit-SGD: no
-05/03/2016 18:18:33: 		Math lib: acml
-05/03/2016 18:18:33: 		CUDA_PATH: /usr/local/cuda-7.5
-05/03/2016 18:18:33: 		CUB_PATH: /usr/local/cub-1.4.1
-05/03/2016 18:18:33: 		CUDNN_PATH: /usr/local/cudnn-4.0
-05/03/2016 18:18:33: 		Build Branch: HEAD
-05/03/2016 18:18:33: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
-05/03/2016 18:18:33: 		Built by philly on 18750d26eb32
-05/03/2016 18:18:33: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-05/03/2016 18:18:33: -------------------------------------------------------------------
+05/03/2016 18:18:58: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:18:58: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:18:58: 		Build type: release
+05/03/2016 18:18:58: 		Build target: GPU
+05/03/2016 18:18:58: 		With 1bit-SGD: no
+05/03/2016 18:18:58: 		Math lib: acml
+05/03/2016 18:18:58: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:18:58: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:18:58: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:18:58: 		Build Branch: HEAD
+05/03/2016 18:18:58: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:18:58: 		Built by philly on 18750d26eb32
+05/03/2016 18:18:58: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:18:58: -------------------------------------------------------------------
 
-05/03/2016 18:18:33: Running on localhost at 2016/05/03 18:18:33
-05/03/2016 18:18:33: Command line: 
-/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu  DeviceId=0  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 18:18:58: Running on localhost at 2016/05/03 18:18:58
+05/03/2016 18:18:58: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD/cntk.cntk  currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu  DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD  OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 18:18:33: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:33: precision=float
+05/03/2016 18:18:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:58: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=$DeviceId$
 speechTrain=[
@@ -120,25 +120,23 @@ SVDTrain=[
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:18:33: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:33: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 18:18:33: precision=float
+05/03/2016 18:18:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:58: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=0
 speechTrain=[
     action=train
     makeMode=false
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn
     deviceId=0
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -183,19 +181,19 @@ reader=[
 ]
 modelDecomposition=[
     action=SVD
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn
+    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 SVDTrain=[
     action=train
     makeMode=true
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
     deviceId=0
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/nonExistent.ndl
+        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -210,32 +208,30 @@ SVDTrain=[
     ]
 ]
 currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
-ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
-OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
+OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 18:18:33: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 18:18:33: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:18:58: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain:modelDecomposition:SVDTrain
-configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/../../SVD
+configparameters: cntk.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/SVD
 configparameters: cntk.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:modelDecomposition=[
     action=SVD
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn
+    outputmodelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 
-configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+configparameters: cntk.cntk:OutputDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 configparameters: cntk.cntk:precision=float
 configparameters: cntk.cntk:reader=[
     readerType=HTKMLFReader
@@ -254,13 +250,13 @@ configparameters: cntk.cntk:reader=[
         labelDim=132
         labelType=Category
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu
+configparameters: cntk.cntk:RunDir=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action=train
     makeMode=false
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn
     deviceId=0
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -289,11 +285,11 @@ configparameters: cntk.cntk:speechTrain=[
 configparameters: cntk.cntk:SVDTrain=[
     action=train
     makeMode=true
-    modelPath=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
+    modelPath=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
     deviceId=0
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/nonExistent.ndl
+        NetworkDescription=/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -309,32 +305,31 @@ configparameters: cntk.cntk:SVDTrain=[
 ]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 18:18:33: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 18:18:33: Commands: speechTrain modelDecomposition SVDTrain
-05/03/2016 18:18:33: Precision = "float"
-05/03/2016 18:18:33: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-05/03/2016 18:18:33: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 18:18:33: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
-05/03/2016 18:18:33: CNTKCommandTrainInfo: SVDTrain : 2
-05/03/2016 18:18:33: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
+05/03/2016 18:18:58: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:18:58: Commands: speechTrain modelDecomposition SVDTrain
+05/03/2016 18:18:58: Precision = "float"
+05/03/2016 18:18:58: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn
+05/03/2016 18:18:58: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 18:18:58: CNTKModelPath: /tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
+05/03/2016 18:18:58: CNTKCommandTrainInfo: SVDTrain : 2
+05/03/2016 18:18:58: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
 
-05/03/2016 18:18:33: ##############################################################################
-05/03/2016 18:18:33: #                                                                            #
-05/03/2016 18:18:33: # Action "train"                                                             #
-05/03/2016 18:18:33: #                                                                            #
-05/03/2016 18:18:33: ##############################################################################
+05/03/2016 18:18:58: ##############################################################################
+05/03/2016 18:18:58: #                                                                            #
+05/03/2016 18:18:58: # Action "train"                                                             #
+05/03/2016 18:18:58: #                                                                            #
+05/03/2016 18:18:58: ##############################################################################
 
-05/03/2016 18:18:33: CNTKCommandTrainBegin: speechTrain
+05/03/2016 18:18:58: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:33: Creating virgin network.
+05/03/2016 18:18:58: Creating virgin network.
 SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -387,14 +382,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:33: Created model with 25 nodes on GPU 0.
+05/03/2016 18:18:58: Created model with 25 nodes on GPU 0.
 
-05/03/2016 18:18:33: Training criterion node(s):
-05/03/2016 18:18:33: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:18:58: Training criterion node(s):
+05/03/2016 18:18:58: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:33: Evaluation criterion node(s):
+05/03/2016 18:18:58: Evaluation criterion node(s):
 
-05/03/2016 18:18:33: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:18:58: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -402,114 +397,119 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-0x2cd0238: {[features Value[363 x *]] }
-0x35460e8: {[MeanOfFeatures Value[363]] }
-0x35465f8: {[InvStdOfFeatures Value[363]] }
-0x3546de8: {[W0 Value[512 x 363]] }
-0x3aaa188: {[W1 Value[512 x 512]] }
-0x3aaaf58: {[B1 Value[512 x 1]] }
-0x3aac0f8: {[W2 Value[132 x 512]] }
-0x3aac878: {[B2 Value[132 x 1]] }
-0x3aadbd8: {[labels Value[132 x *]] }
-0x3aaee38: {[Prior Value[132]] }
-0x3ab46d8: {[EvalErrorPrediction Value[1]] }
-0x3ab49d8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-0x3ab4b98: {[CrossEntropyWithSoftmax Value[1]] }
-0x3ab5028: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-0x3ab5198: {[LogOfPrior Value[132]] }
-0x3aba7c8: {[B0 Value[512 x 1]] }
-0x3d95e68: {[MVNormalizedFeatures Value[363 x *]] }
-0x3d96628: {[W0*features Value[512 x *]] }
-0x3d96838: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-0x3d96998: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-0x3d96af8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-0x3d96cb8: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-0x3d96e78: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-0x3d97038: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-0x3d97b98: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x3d97d58: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-0x3d97f18: {[W2*H1 Gradient[132 x 1 x *]] }
-0x3d980d8: {[B2 Gradient[132 x 1]] }
+0x1846848: {[B0 Value[512 x 1]] }
+0x18537e8: {[features Value[363 x *]] }
+0x24ae688: {[InvStdOfFeatures Value[363]] }
+0x24af3b8: {[W0 Value[512 x 363]] }
+0x24b31c8: {[MeanOfFeatures Value[363]] }
+0x2502cc8: {[W1 Value[512 x 512]] }
+0x28b72a8: {[B1 Value[512 x 1]] }
+0x28b8428: {[W2 Value[132 x 512]] }
+0x28b8ba8: {[B2 Value[132 x 1]] }
+0x28b9f08: {[labels Value[132 x *]] }
+0x28bb168: {[Prior Value[132]] }
+0x28c0b98: {[EvalErrorPrediction Value[1]] }
+0x28c0cf8: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+0x28c0eb8: {[CrossEntropyWithSoftmax Value[1]] }
+0x28c1348: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+0x28c1478: {[LogOfPrior Value[132]] }
+0x28c2bd8: {[MVNormalizedFeatures Value[363 x *]] }
+0x28c3398: {[W0*features Value[512 x *]] }
+0x28c35a8: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+0x28c3708: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+0x28c38c8: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+0x28c3a88: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+0x28c3c48: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+0x28c3e08: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+0x28c4968: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x28c4b28: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+0x28c4ce8: {[W2*H1 Gradient[132 x 1 x *]] }
+0x28c4ea8: {[B2 Gradient[132 x 1]] }
 
 
-05/03/2016 18:18:33: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 18:18:58: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 18:18:33: 	MeanOfFeatures = Mean()
-05/03/2016 18:18:33: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 18:18:33: 	Prior = Mean()
+05/03/2016 18:18:58: 	MeanOfFeatures = Mean()
+05/03/2016 18:18:58: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 18:18:58: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:35: Precomputing --> Completed.
+05/03/2016 18:18:59: Precomputing --> Completed.
 
 
-05/03/2016 18:18:35: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 18:18:59: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:35: Starting minibatch loop.
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135277 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.0919s; samplesPerSecond = 6966.6
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070992 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.0089s; samplesPerSecond = 72007.2
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901123 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0089s; samplesPerSecond = 71644.5
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945953 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0091s; samplesPerSecond = 70648.0
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219574 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0088s; samplesPerSecond = 72513.0
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890930 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.0088s; samplesPerSecond = 72776.9
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56186981 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.0088s; samplesPerSecond = 72562.4
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790527 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0090s; samplesPerSecond = 71364.9
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928528 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0090s; samplesPerSecond = 71221.9
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398926 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.0089s; samplesPerSecond = 71934.4
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223450 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.0089s; samplesPerSecond = 71845.5
-05/03/2016 18:18:35:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265564 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.0090s; samplesPerSecond = 70812.1
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14082031 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.0088s; samplesPerSecond = 72694.2
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00689697 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.0090s; samplesPerSecond = 70741.7
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00495911 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.0090s; samplesPerSecond = 70851.3
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97858887 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0088s; samplesPerSecond = 72381.8
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686035 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0088s; samplesPerSecond = 72373.6
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053345 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0095s; samplesPerSecond = 67703.4
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653564 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.0082s; samplesPerSecond = 78230.0
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702026 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0083s; samplesPerSecond = 77500.6
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61571655 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0082s; samplesPerSecond = 78029.7
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55236206 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0082s; samplesPerSecond = 78230.0
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211670 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.0082s; samplesPerSecond = 78067.8
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778687 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0083s; samplesPerSecond = 77500.6
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900635 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0082s; samplesPerSecond = 77821.0
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967285 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0082s; samplesPerSecond = 77764.3
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281982 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.0082s; samplesPerSecond = 77632.2
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19668579 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.0083s; samplesPerSecond = 77491.2
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28980103 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.0082s; samplesPerSecond = 77792.6
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750854 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.0082s; samplesPerSecond = 77773.7
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26263428 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.0082s; samplesPerSecond = 77651.1
-05/03/2016 18:18:36:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15072632 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.0082s; samplesPerSecond = 78105.9
-05/03/2016 18:18:36: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995720 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.400032s
-05/03/2016 18:18:36: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn.1'
+05/03/2016 18:18:59: Starting minibatch loop.
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.32135277 * 640; EvalErrorPrediction = 0.90000000 * 640; time = 0.0102s; samplesPerSecond = 62924.0
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.15070992 * 640; EvalErrorPrediction = 0.86718750 * 640; time = 0.0075s; samplesPerSecond = 84813.1
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.99901123 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0074s; samplesPerSecond = 85975.3
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.86945953 * 640; EvalErrorPrediction = 0.87656250 * 640; time = 0.0075s; samplesPerSecond = 85790.9
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.62%]: CrossEntropyWithSoftmax = 3.80219574 * 640; EvalErrorPrediction = 0.87812500 * 640; time = 0.0074s; samplesPerSecond = 86148.9
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72890930 * 640; EvalErrorPrediction = 0.86875000 * 640; time = 0.0075s; samplesPerSecond = 85630.2
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.56186981 * 640; EvalErrorPrediction = 0.82343750 * 640; time = 0.0075s; samplesPerSecond = 85676.0
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.42790527 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0074s; samplesPerSecond = 86183.7
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.12%]: CrossEntropyWithSoftmax = 3.33928528 * 640; EvalErrorPrediction = 0.77343750 * 640; time = 0.0075s; samplesPerSecond = 85710.5
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36398926 * 640; EvalErrorPrediction = 0.84375000 * 640; time = 0.0075s; samplesPerSecond = 85906.0
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.21223450 * 640; EvalErrorPrediction = 0.75312500 * 640; time = 0.0075s; samplesPerSecond = 85630.2
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.31265564 * 640; EvalErrorPrediction = 0.78750000 * 640; time = 0.0075s; samplesPerSecond = 85390.3
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.62%]: CrossEntropyWithSoftmax = 3.14082031 * 640; EvalErrorPrediction = 0.74687500 * 640; time = 0.0075s; samplesPerSecond = 85756.4
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.00689697 * 640; EvalErrorPrediction = 0.69687500 * 640; time = 0.0074s; samplesPerSecond = 86148.9
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.00495911 * 640; EvalErrorPrediction = 0.72343750 * 640; time = 0.0074s; samplesPerSecond = 86288.3
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.97858887 * 640; EvalErrorPrediction = 0.73906250 * 640; time = 0.0074s; samplesPerSecond = 86160.5
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.12%]: CrossEntropyWithSoftmax = 2.85686035 * 640; EvalErrorPrediction = 0.70781250 * 640; time = 0.0075s; samplesPerSecond = 85906.0
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.69053345 * 640; EvalErrorPrediction = 0.67187500 * 640; time = 0.0075s; samplesPerSecond = 85894.5
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.78653564 * 640; EvalErrorPrediction = 0.70468750 * 640; time = 0.0075s; samplesPerSecond = 85871.5
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.57702026 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0074s; samplesPerSecond = 86044.6
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.62%]: CrossEntropyWithSoftmax = 2.61571655 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0075s; samplesPerSecond = 85584.4
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.55236206 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0075s; samplesPerSecond = 85664.6
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48211670 * 640; EvalErrorPrediction = 0.62500000 * 640; time = 0.0075s; samplesPerSecond = 85527.2
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.38778687 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0075s; samplesPerSecond = 85538.6
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.12%]: CrossEntropyWithSoftmax = 2.36900635 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0075s; samplesPerSecond = 85435.9
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.43967285 * 640; EvalErrorPrediction = 0.63281250 * 640; time = 0.0075s; samplesPerSecond = 85231.1
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.30281982 * 640; EvalErrorPrediction = 0.61250000 * 640; time = 0.0075s; samplesPerSecond = 84948.2
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.19668579 * 640; EvalErrorPrediction = 0.55937500 * 640; time = 0.0075s; samplesPerSecond = 85550.1
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.62%]: CrossEntropyWithSoftmax = 2.28980103 * 640; EvalErrorPrediction = 0.60468750 * 640; time = 0.0075s; samplesPerSecond = 85208.4
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.17750854 * 640; EvalErrorPrediction = 0.62187500 * 640; time = 0.0075s; samplesPerSecond = 85163.0
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.26263428 * 640; EvalErrorPrediction = 0.59687500 * 640; time = 0.0075s; samplesPerSecond = 85310.6
+05/03/2016 18:18:59:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.15072632 * 640; EvalErrorPrediction = 0.56250000 * 640; time = 0.0075s; samplesPerSecond = 85140.3
+05/03/2016 18:18:59: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.97995720 * 20480; EvalErrorPrediction = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.244611s
+05/03/2016 18:18:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 18:18:36: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 18:18:59: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:36: Starting minibatch loop.
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598530 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0180s; samplesPerSecond = 141985.6
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818569 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.0150s; samplesPerSecond = 171008.7
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698120 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0150s; samplesPerSecond = 170723.6
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126144 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.0148s; samplesPerSecond = 172646.3
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067825 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.0147s; samplesPerSecond = 173771.4
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115860 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0148s; samplesPerSecond = 173089.9
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518127 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.0145s; samplesPerSecond = 176393.6
-05/03/2016 18:18:36:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450439 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0146s; samplesPerSecond = 175258.4
-05/03/2016 18:18:36: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924202 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.123232s
-05/03/2016 18:18:36: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn.2'
+05/03/2016 18:18:59: Starting minibatch loop.
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.01598530 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0144s; samplesPerSecond = 177617.4
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98818569 * 2560; EvalErrorPrediction = 0.54296875 * 2560; time = 0.0121s; samplesPerSecond = 212131.3
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98698120 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0120s; samplesPerSecond = 212695.2
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.93126144 * 2560; EvalErrorPrediction = 0.52773437 * 2560; time = 0.0120s; samplesPerSecond = 212748.3
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90067825 * 2560; EvalErrorPrediction = 0.52656250 * 2560; time = 0.0120s; samplesPerSecond = 213138.0
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.97115860 * 2560; EvalErrorPrediction = 0.54140625 * 2560; time = 0.0120s; samplesPerSecond = 212642.2
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89518127 * 2560; EvalErrorPrediction = 0.52031250 * 2560; time = 0.0120s; samplesPerSecond = 214064.7
+05/03/2016 18:18:59:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.90450439 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0119s; samplesPerSecond = 214477.2
+05/03/2016 18:18:59: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.94924202 * 20480; EvalErrorPrediction = 0.53417969 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.100566s
+05/03/2016 18:18:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 18:18:36: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:18:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:36: Starting minibatch loop.
-05/03/2016 18:18:36:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.0485s; samplesPerSecond = 211264.7
-05/03/2016 18:18:36:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0426s; samplesPerSecond = 240279.7
-05/03/2016 18:18:36: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.093474s
-05/03/2016 18:18:36: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 18:18:36: CNTKCommandTrainEnd: speechTrain
+05/03/2016 18:18:59: Starting minibatch loop.
+05/03/2016 18:19:00:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87359848 * 10240; EvalErrorPrediction = 0.51933594 * 10240; time = 0.0356s; samplesPerSecond = 287777.9
+05/03/2016 18:19:00:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.86656265 * 10240; EvalErrorPrediction = 0.51748047 * 10240; time = 0.0300s; samplesPerSecond = 341492.7
+05/03/2016 18:19:00: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87008057 * 20480; EvalErrorPrediction = 0.51840820 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.070526s
+05/03/2016 18:19:00: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 18:19:00: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 18:18:36: Action "train" complete.
+05/03/2016 18:19:00: Action "train" complete.
 
 
-05/03/2016 18:18:36: ##############################################################################
-05/03/2016 18:18:36: #                                                                            #
-05/03/2016 18:18:36: # Action "SVD"                                                               #
-05/03/2016 18:18:36: #                                                                            #
-05/03/2016 18:18:36: ##############################################################################
+05/03/2016 18:19:00: ##############################################################################
+05/03/2016 18:19:00: #                                                                            #
+05/03/2016 18:19:00: # Action "SVD"                                                               #
+05/03/2016 18:19:00: #                                                                            #
+05/03/2016 18:19:00: ##############################################################################
 
 
 Post-processing network...
@@ -565,8 +565,8 @@ Post-processing network complete.
 --------------------------------------------------------------------------------------------
 ParameterSVD: start to process group 0 with KeepRatio=0.50
 --------------------------------------------------------------------------------------------
-Performing SVD for a   512-by-363   matrix (node name: W0                  ) ---  computation time  0.34 secs ;  keep 50.0% energy ===> keep   104 svd values (reduce to 49.0% parameters) 
-Performing SVD for a   512-by-512   matrix (node name: W1                  ) ---  computation time  0.21 secs ;  keep 50.0% energy ===> keep   128 svd values (reduce to 50.0% parameters) 
+Performing SVD for a   512-by-363   matrix (node name: W0                  ) ---  computation time  0.31 secs ;  keep 50.0% energy ===> keep   104 svd values (reduce to 49.0% parameters) 
+Performing SVD for a   512-by-512   matrix (node name: W1                  ) ---  computation time  0.20 secs ;  keep 50.0% energy ===> keep   128 svd values (reduce to 50.0% parameters) 
 Performing SVD for a   132-by-512   matrix (node name: W2                  ) ---  computation time  0.04 secs ;  keep 50.0% energy ===> keep    32 svd values (reduce to 30.5% parameters) 
 
 Post-processing network...
@@ -626,26 +626,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 18:18:37: Action "SVD" complete.
+05/03/2016 18:19:00: Action "SVD" complete.
 
 
-05/03/2016 18:18:37: ##############################################################################
-05/03/2016 18:18:37: #                                                                            #
-05/03/2016 18:18:37: # Action "train"                                                             #
-05/03/2016 18:18:37: #                                                                            #
-05/03/2016 18:18:37: ##############################################################################
+05/03/2016 18:19:00: ##############################################################################
+05/03/2016 18:19:00: #                                                                            #
+05/03/2016 18:19:00: # Action "train"                                                             #
+05/03/2016 18:19:00: #                                                                            #
+05/03/2016 18:19:00: ##############################################################################
 
-05/03/2016 18:18:37: CNTKCommandTrainBegin: SVDTrain
+05/03/2016 18:19:00: CNTKCommandTrainBegin: SVDTrain
 NDLBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/state.list
 htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Speech/Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 18:18:37: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0'.
+05/03/2016 18:19:00: Starting from checkpoint. Loading network from '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0'.
 
 Post-processing network...
 
@@ -703,14 +702,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 18:18:37: Loaded model with 31 nodes on GPU 0.
+05/03/2016 18:19:00: Loaded model with 31 nodes on GPU 0.
 
-05/03/2016 18:18:37: Training criterion node(s):
-05/03/2016 18:18:37: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 18:19:00: Training criterion node(s):
+05/03/2016 18:19:00: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 18:18:37: Evaluation criterion node(s):
+05/03/2016 18:19:00: Evaluation criterion node(s):
 
-05/03/2016 18:18:37: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 18:19:00: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -718,60 +717,63 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *3]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *3]] [PosteriorProb Value[132 x 1 x *3]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *3]] [features Gradient[363 x *3]] [labels Gradient[132 x *3]] }
-0x2bf97f8: {[W1-U Value[512 x 128]] }
-0x2bfecb8: {[features Value[363 x *3]] }
-0x2bff5a8: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
-0x2bfff48: {[InvStdOfFeatures Value[363]] }
-0x2c010f8: {[W0-SVD Value[512 x 363]] }
-0x2c01378: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
-0x2c01538: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
-0x2c74c98: {[W2-U Value[132 x 32]] }
-0x2c93f68: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
-0x2c94128: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
-0x2c942e8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
-0x2c943e8: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
-0x2c947d8: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
-0x2c94f48: {[W0-V Value[104 x 363]] }
-0x2c97b08: {[W2-V Value[32 x 512]] }
-0x2c994b8: {[B1 Value[512 x 1]] }
-0x2cba288: {[B2 Value[132 x 1]] }
-0x3aaf7d8: {[CrossEntropyWithSoftmax Value[1]] }
-0x3ab51c8: {[MeanOfFeatures Value[363]] }
-0x3abb598: {[W1-V Value[128 x 512]] }
-0x3d96568: {[CrossEntropyWithSoftmax Gradient[1]] }
-0x3d96728: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
-0x3d968e8: {[W2*H1 Gradient[132 x 1 x *3]] }
-0x3d96aa8: {[B2 Gradient[132 x 1]] }
-0x3d99f28: {[Prior Value[132]] }
-0x3d9bb88: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
-0x3dbbaa8: {[W0-U Value[512 x 104]] }
-0x3de4d38: {[labels Value[132 x *3]] }
-0x3e15fb8: {[B0 Value[512 x 1]] }
-0x3e241e8: {[LogOfPrior Value[132]] }
-0x3e33a48: {[W2-SVD Value[132 x 512]] }
-0x3e33cc8: {[W1-SVD Value[512 x 512]] }
-0x3e33e88: {[MVNormalizedFeatures Value[363 x *3]] }
-0x3e3b2c8: {[EvalErrorPrediction Value[1]] }
+0x17322d8: {[features Value[363 x *3]] }
+0x1736198: {[B2 Gradient[132 x 1]] }
+0x1736fa8: {[B2 Value[132 x 1]] }
+0x178b118: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
+0x178b248: {[LogOfPrior Value[132]] }
+0x187f3d8: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
+0x1884c78: {[CrossEntropyWithSoftmax Value[1]] }
+0x189b338: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
+0x189b7b8: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
+0x189b978: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
+0x189bb38: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
+0x24af0e8: {[W2-V Value[32 x 512]] }
+0x24b2ac8: {[Prior Value[132]] }
+0x2502338: {[EvalErrorPrediction Value[1]] }
+0x28bb808: {[W2-U Value[132 x 32]] }
+0x28ed478: {[MVNormalizedFeatures Value[363 x *3]] }
+0x2900aa8: {[B1 Value[512 x 1]] }
+0x5b40e28: {[W2-SVD Value[132 x 512]] }
+0x5b40fe8: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
+0x5b410a8: {[W1-SVD Value[512 x 512]] }
+0x5b41328: {[W0-SVD Value[512 x 363]] }
+0x5b45cb8: {[W1-U Value[512 x 128]] }
+0x5b51cc8: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
+0x5b51e88: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
+0x5b61528: {[B0 Value[512 x 1]] }
+0x5b8c778: {[W1-V Value[128 x 512]] }
+0x5b8ee28: {[CrossEntropyWithSoftmax Gradient[1]] }
+0x5b8efe8: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
+0x5b8f1a8: {[W2*H1 Gradient[132 x 1 x *3]] }
+0x5b8fa58: {[W0-U Value[512 x 104]] }
+0x5b8fb58: {[W0-V Value[104 x 363]] }
+0x5b94138: {[labels Value[132 x *3]] }
+0x5b94558: {[MeanOfFeatures Value[363]] }
+0x5b94a08: {[InvStdOfFeatures Value[363]] }
 
-05/03/2016 18:18:37: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 18:19:00: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 18:18:37: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:19:00: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 18:18:37: Starting minibatch loop.
-05/03/2016 18:18:37:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86152649 * 10240; EvalErrorPrediction = 0.51777344 * 10240; time = 0.1811s; samplesPerSecond = 56543.7
-05/03/2016 18:18:37:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81946220 * 10240; EvalErrorPrediction = 0.51054687 * 10240; time = 0.0421s; samplesPerSecond = 243311.3
-05/03/2016 18:18:37: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.84049435 * 20480; EvalErrorPrediction = 0.51416016 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-05; epochTime=0.287026s
-05/03/2016 18:18:37: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.1'
+05/03/2016 18:19:00: Starting minibatch loop.
+05/03/2016 18:19:00:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86152649 * 10240; EvalErrorPrediction = 0.51777344 * 10240; time = 0.0381s; samplesPerSecond = 268470.5
+05/03/2016 18:19:01:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81946220 * 10240; EvalErrorPrediction = 0.51054687 * 10240; time = 0.0341s; samplesPerSecond = 299976.6
+05/03/2016 18:19:01: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.84049435 * 20480; EvalErrorPrediction = 0.51416016 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-05; epochTime=0.186662s
+05/03/2016 18:19:01: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.1'
 
-05/03/2016 18:18:37: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 18:19:01: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 18:18:37: Starting minibatch loop.
-05/03/2016 18:18:37:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.80154209 * 10240; EvalErrorPrediction = 0.50097656 * 10240; time = 0.0705s; samplesPerSecond = 145151.5
-05/03/2016 18:18:37:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81663570 * 10240; EvalErrorPrediction = 0.50869141 * 10240; time = 0.0441s; samplesPerSecond = 231936.6
-05/03/2016 18:18:37: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.80908890 * 20480; EvalErrorPrediction = 0.50483398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-05; epochTime=0.11613s
-05/03/2016 18:18:37: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech/ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn'
-05/03/2016 18:18:37: CNTKCommandTrainEnd: SVDTrain
+05/03/2016 18:19:01: Starting minibatch loop.
+05/03/2016 18:19:01:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.80154209 * 10240; EvalErrorPrediction = 0.50097656 * 10240; time = 0.0354s; samplesPerSecond = 289036.9
+05/03/2016 18:19:01:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81663570 * 10240; EvalErrorPrediction = 0.50869141 * 10240; time = 0.0341s; samplesPerSecond = 300548.9
+05/03/2016 18:19:01: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.80908890 * 20480; EvalErrorPrediction = 0.50483398 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-05; epochTime=0.073052s
+05/03/2016 18:19:01: SGD: Saving checkpoint model '/tmp/cntk-test-20160503181449.303380/Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn'
+05/03/2016 18:19:01: CNTKCommandTrainEnd: SVDTrain
 
-05/03/2016 18:18:37: Action "train" complete.
+05/03/2016 18:19:01: Action "train" complete.
 
-05/03/2016 18:18:37: __COMPLETED__
\ No newline at end of file
+05/03/2016 18:19:01: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.cpu.txt
index 97416dd37..dafe7fd2c 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.cpu.txt
@@ -1,4 +1,4 @@
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu DeviceId=-1 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu DeviceId=-1 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -16,31 +16,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:47: -------------------------------------------------------------------
-05/03/2016 14:27:47: Build info: 
+05/03/2016 14:28:24: -------------------------------------------------------------------
+05/03/2016 14:28:24: Build info: 
 
-05/03/2016 14:27:47: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:47: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:47: 		Build type: Release
-05/03/2016 14:27:47: 		Build target: GPU
-05/03/2016 14:27:47: 		With 1bit-SGD: no
-05/03/2016 14:27:47: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:47: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:47: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:47: 		Build Branch: HEAD
-05/03/2016 14:27:47: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:47: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:47: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:47: -------------------------------------------------------------------
+05/03/2016 14:28:24: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:24: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:24: 		Build type: Release
+05/03/2016 14:28:24: 		Build target: GPU
+05/03/2016 14:28:24: 		With 1bit-SGD: no
+05/03/2016 14:28:24: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:24: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:24: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:24: 		Build Branch: HEAD
+05/03/2016 14:28:24: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:24: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:24: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:24: -------------------------------------------------------------------
 
-05/03/2016 14:27:47: Running on cntk-muc02 at 2016/05/03 14:27:47
-05/03/2016 14:27:47: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu  DeviceId=-1  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 14:28:24: Running on cntk-muc02 at 2016/05/03 14:28:24
+05/03/2016 14:28:24: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu  DeviceId=-1  timestamping=true
 
 
 
-05/03/2016 14:27:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:47: precision=float
+05/03/2016 14:28:24: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:24: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=$DeviceId$
 speechTrain=[
@@ -118,25 +118,23 @@ SVDTrain=[
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 DeviceId=-1
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:27:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:24: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:47: precision=float
+05/03/2016 14:28:24: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:24: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=-1
 speechTrain=[
     action=train
     makeMode=false
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn
     deviceId=-1
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -181,19 +179,19 @@ reader=[
 ]
 modelDecomposition=[
     action=SVD
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn
+    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 SVDTrain=[
     action=train
     makeMode=true
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
     deviceId=-1
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/nonExistent.ndl
+        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -208,18 +206,16 @@ SVDTrain=[
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 DeviceId=-1
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:27:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:24: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:24: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain:modelDecomposition:SVDTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
@@ -227,13 +223,13 @@ configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Te
 configparameters: cntk.cntk:deviceId=-1
 configparameters: cntk.cntk:modelDecomposition=[
     action=SVD
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn
+    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 configparameters: cntk.cntk:precision=float
 configparameters: cntk.cntk:reader=[
     readerType=HTKMLFReader
@@ -252,13 +248,13 @@ configparameters: cntk.cntk:reader=[
         labelDim=132
         labelType=Category
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu
 configparameters: cntk.cntk:speechTrain=[
     action=train
     makeMode=false
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn
     deviceId=-1
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -287,11 +283,11 @@ configparameters: cntk.cntk:speechTrain=[
 configparameters: cntk.cntk:SVDTrain=[
     action=train
     makeMode=true
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
     deviceId=-1
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/nonExistent.ndl
+        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -307,32 +303,31 @@ configparameters: cntk.cntk:SVDTrain=[
 ]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:47: Commands: speechTrain modelDecomposition SVDTrain
-05/03/2016 14:27:47: Precision = "float"
-05/03/2016 14:27:47: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn
-05/03/2016 14:27:47: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:47: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn
-05/03/2016 14:27:47: CNTKCommandTrainInfo: SVDTrain : 2
-05/03/2016 14:27:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
+05/03/2016 14:28:24: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:24: Commands: speechTrain modelDecomposition SVDTrain
+05/03/2016 14:28:24: Precision = "float"
+05/03/2016 14:28:24: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn
+05/03/2016 14:28:24: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:24: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn
+05/03/2016 14:28:24: CNTKCommandTrainInfo: SVDTrain : 2
+05/03/2016 14:28:24: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
 
-05/03/2016 14:27:47: ##############################################################################
-05/03/2016 14:27:47: #                                                                            #
-05/03/2016 14:27:47: # Action "train"                                                             #
-05/03/2016 14:27:47: #                                                                            #
-05/03/2016 14:27:47: ##############################################################################
+05/03/2016 14:28:24: ##############################################################################
+05/03/2016 14:28:24: #                                                                            #
+05/03/2016 14:28:24: # Action "train"                                                             #
+05/03/2016 14:28:24: #                                                                            #
+05/03/2016 14:28:24: ##############################################################################
 
-05/03/2016 14:27:47: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:24: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:47: Creating virgin network.
+05/03/2016 14:28:24: Creating virgin network.
 
 Post-processing network...
 
@@ -384,14 +379,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:47: Created model with 25 nodes on CPU.
+05/03/2016 14:28:24: Created model with 25 nodes on CPU.
 
-05/03/2016 14:27:47: Training criterion node(s):
-05/03/2016 14:27:47: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:24: Training criterion node(s):
+05/03/2016 14:28:24: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:47: Evaluation criterion node(s):
+05/03/2016 14:28:24: Evaluation criterion node(s):
 
-05/03/2016 14:27:47: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:24: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -399,114 +394,119 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-00000078BD1F1E60: {[W0 Value[512 x 363]] }
-00000078BD1F20E0: {[W1 Value[512 x 512]] }
-00000078BD1F2540: {[B1 Value[512 x 1]] }
-00000078BD1F25E0: {[B0 Value[512 x 1]] }
-00000078BD1F2720: {[features Value[363 x *]] }
-00000078BD1F27C0: {[MeanOfFeatures Value[363]] }
-00000078BD1F2900: {[InvStdOfFeatures Value[363]] }
-00000078BD269780: {[EvalErrorPrediction Value[1]] }
-00000078BD269820: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-00000078BD269960: {[CrossEntropyWithSoftmax Value[1]] }
-00000078BD269B40: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-00000078BD269D20: {[W0*features Value[512 x *]] }
-00000078BD269DC0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-00000078BD269E60: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-00000078BD269F00: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-00000078BD26A040: {[labels Value[132 x *]] }
-00000078BD26A220: {[LogOfPrior Value[132]] }
-00000078BD26A2C0: {[MVNormalizedFeatures Value[363 x *]] }
-00000078BD26A400: {[W2 Value[132 x 512]] }
-00000078BD26A540: {[Prior Value[132]] }
-00000078BD26A680: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-00000078BD26A9A0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-00000078BD26AC20: {[W2*H1 Gradient[132 x 1 x *]] }
-00000078BD26ACC0: {[B2 Gradient[132 x 1]] }
-00000078BD26AE00: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-00000078BD26AFE0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
-00000078BD26B080: {[CrossEntropyWithSoftmax Gradient[1]] }
-00000078BD26B260: {[B2 Value[132 x 1]] }
+00000060624C0630: {[W1 Value[512 x 512]] }
+00000060624C09F0: {[W0 Value[512 x 363]] }
+00000060624C0B30: {[MeanOfFeatures Value[363]] }
+00000060624C0BD0: {[features Value[363 x *]] }
+00000060624C1170: {[InvStdOfFeatures Value[363]] }
+00000060624C1210: {[B0 Value[512 x 1]] }
+00000060624C12B0: {[B1 Value[512 x 1]] }
+00000060644DEDD0: {[B2 Gradient[132 x 1]] }
+00000060644DEE70: {[LogOfPrior Value[132]] }
+00000060644DF050: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+00000060644DF190: {[Prior Value[132]] }
+00000060644DF2D0: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+00000060644DF5F0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+00000060644DF690: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+00000060644DF9B0: {[W2 Value[132 x 512]] }
+00000060644DFD70: {[CrossEntropyWithSoftmax Value[1]] }
+00000060644DFEB0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+00000060644DFF50: {[B2 Value[132 x 1]] }
+00000060644DFFF0: {[labels Value[132 x *]] }
+00000060644E01D0: {[EvalErrorPrediction Value[1]] }
+00000060644E0270: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
+00000060644E0450: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+00000060644E04F0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+00000060644E0630: {[W0*features Value[512 x *]] }
+00000060644E06D0: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000060644E0810: {[MVNormalizedFeatures Value[363 x *]] }
+00000060644E08B0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+00000060644E0950: {[W2*H1 Gradient[132 x 1 x *]] }
 
 
-05/03/2016 14:27:47: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 14:28:24: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 14:27:47: 	MeanOfFeatures = Mean()
-05/03/2016 14:27:47: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 14:27:47: 	Prior = Mean()
+05/03/2016 14:28:24: 	MeanOfFeatures = Mean()
+05/03/2016 14:28:24: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 14:28:24: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:48: Precomputing --> Completed.
+05/03/2016 14:28:25: Precomputing --> Completed.
 
 
-05/03/2016 14:27:48: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 14:28:25: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:48: Starting minibatch loop.
-05/03/2016 14:27:48:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944885 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.1924s; samplesPerSecond = 3327.0
-05/03/2016 14:27:48:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22300034 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0572s; samplesPerSecond = 11196.4
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971329 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0569s; samplesPerSecond = 11249.2
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341614 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0568s; samplesPerSecond = 11269.8
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074249 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0570s; samplesPerSecond = 11233.0
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71251984 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.0569s; samplesPerSecond = 11250.6
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563110 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.0570s; samplesPerSecond = 11235.0
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348450 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.0569s; samplesPerSecond = 11251.4
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739685 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.0565s; samplesPerSecond = 11337.5
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51961060 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0563s; samplesPerSecond = 11372.7
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656067 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.0560s; samplesPerSecond = 11431.2
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397217 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0560s; samplesPerSecond = 11433.3
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780762 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.0559s; samplesPerSecond = 11448.0
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845886 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0560s; samplesPerSecond = 11433.7
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06457214 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0564s; samplesPerSecond = 11356.4
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91632080 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0559s; samplesPerSecond = 11447.8
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90608521 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.0565s; samplesPerSecond = 11331.2
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095459 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0811s; samplesPerSecond = 7888.9
-05/03/2016 14:27:49:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67088013 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0558s; samplesPerSecond = 11464.6
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608643 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0558s; samplesPerSecond = 11463.2
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54733276 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0559s; samplesPerSecond = 11443.3
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925659 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0560s; samplesPerSecond = 11426.3
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52387695 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0558s; samplesPerSecond = 11468.3
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47543945 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.0560s; samplesPerSecond = 11431.0
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265381 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.0559s; samplesPerSecond = 11458.4
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728516 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.0558s; samplesPerSecond = 11462.1
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674561 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0558s; samplesPerSecond = 11478.6
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020508 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.0562s; samplesPerSecond = 11393.2
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400757 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0560s; samplesPerSecond = 11438.8
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885010 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.0558s; samplesPerSecond = 11474.1
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22711792 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.0559s; samplesPerSecond = 11440.2
-05/03/2016 14:27:50:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604858 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0558s; samplesPerSecond = 11478.4
-05/03/2016 14:27:50: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704632 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.03663s
-05/03/2016 14:27:50: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn.1'
+05/03/2016 14:28:25: Starting minibatch loop.
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.46944885 * 640; EvalErrorPrediction = 0.90781250 * 640; time = 0.0634s; samplesPerSecond = 10101.8
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22300034 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0563s; samplesPerSecond = 11360.8
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.93971329 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0567s; samplesPerSecond = 11284.5
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.92341614 * 640; EvalErrorPrediction = 0.90468750 * 640; time = 0.0560s; samplesPerSecond = 11420.2
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.84074249 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0566s; samplesPerSecond = 11305.8
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.71251984 * 640; EvalErrorPrediction = 0.88437500 * 640; time = 0.0567s; samplesPerSecond = 11281.1
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51563110 * 640; EvalErrorPrediction = 0.82500000 * 640; time = 0.0566s; samplesPerSecond = 11305.2
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.49348450 * 640; EvalErrorPrediction = 0.81093750 * 640; time = 0.0562s; samplesPerSecond = 11387.9
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.34739685 * 640; EvalErrorPrediction = 0.76562500 * 640; time = 0.0565s; samplesPerSecond = 11328.0
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.51961060 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0564s; samplesPerSecond = 11351.7
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.24656067 * 640; EvalErrorPrediction = 0.80312500 * 640; time = 0.0567s; samplesPerSecond = 11293.7
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.33397217 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0565s; samplesPerSecond = 11333.7
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.17780762 * 640; EvalErrorPrediction = 0.77031250 * 640; time = 0.0568s; samplesPerSecond = 11266.8
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.09845886 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0568s; samplesPerSecond = 11273.4
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06457214 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0567s; samplesPerSecond = 11296.6
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91632080 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0567s; samplesPerSecond = 11279.9
+05/03/2016 14:28:26:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90608521 * 640; EvalErrorPrediction = 0.73281250 * 640; time = 0.0566s; samplesPerSecond = 11310.4
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74095459 * 640; EvalErrorPrediction = 0.65937500 * 640; time = 0.0566s; samplesPerSecond = 11316.2
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.67088013 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0569s; samplesPerSecond = 11247.2
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67608643 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0567s; samplesPerSecond = 11296.4
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.54733276 * 640; EvalErrorPrediction = 0.62968750 * 640; time = 0.0564s; samplesPerSecond = 11338.3
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61925659 * 640; EvalErrorPrediction = 0.67343750 * 640; time = 0.0564s; samplesPerSecond = 11352.8
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.52387695 * 640; EvalErrorPrediction = 0.65781250 * 640; time = 0.0565s; samplesPerSecond = 11334.1
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.47543945 * 640; EvalErrorPrediction = 0.63437500 * 640; time = 0.0567s; samplesPerSecond = 11284.9
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43265381 * 640; EvalErrorPrediction = 0.61406250 * 640; time = 0.0564s; samplesPerSecond = 11337.9
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.41728516 * 640; EvalErrorPrediction = 0.63125000 * 640; time = 0.0567s; samplesPerSecond = 11292.3
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.17674561 * 640; EvalErrorPrediction = 0.57812500 * 640; time = 0.0567s; samplesPerSecond = 11289.5
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.31020508 * 640; EvalErrorPrediction = 0.64062500 * 640; time = 0.0569s; samplesPerSecond = 11256.7
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.26400757 * 640; EvalErrorPrediction = 0.61093750 * 640; time = 0.0567s; samplesPerSecond = 11287.5
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15885010 * 640; EvalErrorPrediction = 0.58281250 * 640; time = 0.0577s; samplesPerSecond = 11083.0
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.22711792 * 640; EvalErrorPrediction = 0.59218750 * 640; time = 0.0568s; samplesPerSecond = 11277.1
+05/03/2016 14:28:27:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25604858 * 640; EvalErrorPrediction = 0.60625000 * 640; time = 0.0561s; samplesPerSecond = 11410.0
+05/03/2016 14:28:27: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00704632 * 20480; EvalErrorPrediction = 0.72827148 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=1.83325s
+05/03/2016 14:28:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 14:27:50: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 14:28:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:50: Starting minibatch loop.
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257511 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.1666s; samplesPerSecond = 15364.1
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548573 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1674s; samplesPerSecond = 15296.5
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766983 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.1653s; samplesPerSecond = 15484.9
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049370 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.1646s; samplesPerSecond = 15557.2
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178452 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.1641s; samplesPerSecond = 15598.8
-05/03/2016 14:27:51:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359482 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.1644s; samplesPerSecond = 15567.2
-05/03/2016 14:27:52:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765289 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.1631s; samplesPerSecond = 15695.3
-05/03/2016 14:27:52:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682800 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.1634s; samplesPerSecond = 15667.9
-05/03/2016 14:27:52: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576057 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.32467s
-05/03/2016 14:27:52: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn.2'
+05/03/2016 14:28:27: Starting minibatch loop.
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.10257511 * 2560; EvalErrorPrediction = 0.56484375 * 2560; time = 0.1698s; samplesPerSecond = 15072.4
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00548573 * 2560; EvalErrorPrediction = 0.54843750 * 2560; time = 0.1685s; samplesPerSecond = 15188.6
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00766983 * 2560; EvalErrorPrediction = 0.54960937 * 2560; time = 0.1678s; samplesPerSecond = 15255.3
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.92049370 * 2560; EvalErrorPrediction = 0.53281250 * 2560; time = 0.1686s; samplesPerSecond = 15180.0
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.90178452 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.1706s; samplesPerSecond = 15007.7
+05/03/2016 14:28:28:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.91359482 * 2560; EvalErrorPrediction = 0.53984375 * 2560; time = 0.1679s; samplesPerSecond = 15242.9
+05/03/2016 14:28:29:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.91765289 * 2560; EvalErrorPrediction = 0.53125000 * 2560; time = 0.1674s; samplesPerSecond = 15289.3
+05/03/2016 14:28:29:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.87682800 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.1671s; samplesPerSecond = 15318.8
+05/03/2016 14:28:29: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.95576057 * 20480; EvalErrorPrediction = 0.53979492 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.3529s
+05/03/2016 14:28:29: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 14:27:52: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:29: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:52: Starting minibatch loop.
-05/03/2016 14:27:52:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.5932s; samplesPerSecond = 17262.7
-05/03/2016 14:27:53:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5562s; samplesPerSecond = 18409.5
-05/03/2016 14:27:53: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.1547s
-05/03/2016 14:27:53: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.dnn'
-05/03/2016 14:27:53: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:29: Starting minibatch loop.
+05/03/2016 14:28:30:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.88593941 * 10240; EvalErrorPrediction = 0.52529297 * 10240; time = 0.5934s; samplesPerSecond = 17257.9
+05/03/2016 14:28:30:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.89384575 * 10240; EvalErrorPrediction = 0.51816406 * 10240; time = 0.5903s; samplesPerSecond = 17347.8
+05/03/2016 14:28:30: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.88989258 * 20480; EvalErrorPrediction = 0.52172852 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.18916s
+05/03/2016 14:28:30: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:30: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:27:53: Action "train" complete.
+05/03/2016 14:28:30: Action "train" complete.
 
 
-05/03/2016 14:27:53: ##############################################################################
-05/03/2016 14:27:53: #                                                                            #
-05/03/2016 14:27:53: # Action "SVD"                                                               #
-05/03/2016 14:27:53: #                                                                            #
-05/03/2016 14:27:53: ##############################################################################
+05/03/2016 14:28:30: ##############################################################################
+05/03/2016 14:28:30: #                                                                            #
+05/03/2016 14:28:30: # Action "SVD"                                                               #
+05/03/2016 14:28:30: #                                                                            #
+05/03/2016 14:28:30: ##############################################################################
 
 
 Post-processing network...
@@ -623,26 +623,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 14:27:53: Action "SVD" complete.
+05/03/2016 14:28:31: Action "SVD" complete.
 
 
-05/03/2016 14:27:53: ##############################################################################
-05/03/2016 14:27:53: #                                                                            #
-05/03/2016 14:27:53: # Action "train"                                                             #
-05/03/2016 14:27:53: #                                                                            #
-05/03/2016 14:27:53: ##############################################################################
+05/03/2016 14:28:31: ##############################################################################
+05/03/2016 14:28:31: #                                                                            #
+05/03/2016 14:28:31: # Action "train"                                                             #
+05/03/2016 14:28:31: #                                                                            #
+05/03/2016 14:28:31: ##############################################################################
 
-05/03/2016 14:27:53: CNTKCommandTrainBegin: SVDTrain
+05/03/2016 14:28:31: CNTKCommandTrainBegin: SVDTrain
 NDLBuilder Using CPU
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:54: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.0'.
+05/03/2016 14:28:31: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.0'.
 
 Post-processing network...
 
@@ -700,14 +699,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:54: Loaded model with 31 nodes on CPU.
+05/03/2016 14:28:31: Loaded model with 31 nodes on CPU.
 
-05/03/2016 14:27:54: Training criterion node(s):
-05/03/2016 14:27:54: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:31: Training criterion node(s):
+05/03/2016 14:28:31: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:54: Evaluation criterion node(s):
+05/03/2016 14:28:31: Evaluation criterion node(s):
 
-05/03/2016 14:27:54: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:31: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -715,60 +714,63 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *3]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *3]] [PosteriorProb Value[132 x 1 x *3]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *3]] [features Gradient[363 x *3]] [labels Gradient[132 x *3]] }
-00000078BEF69510: {[B2 Value[132 x 1]] }
-00000078BEF698D0: {[W0-U Value[512 x 104]] }
-00000078BEF69970: {[features Value[363 x *3]] }
-00000078BEF69AB0: {[labels Value[132 x *3]] }
-00000078BEF69D30: {[W0-V Value[104 x 363]] }
-00000078BEF69FB0: {[MeanOfFeatures Value[363]] }
-00000078BEF6A690: {[Prior Value[132]] }
-00000078BEF6AD70: {[B1 Value[512 x 1]] }
-00000078BEF6AE10: {[B0 Value[512 x 1]] }
-00000078BEF6AEB0: {[InvStdOfFeatures Value[363]] }
-00000078BEF8F7E0: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
-00000078BEF8F920: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
-00000078BEF8F9C0: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
-00000078BEF8FBA0: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
-00000078BEF8FCE0: {[CrossEntropyWithSoftmax Gradient[1]] }
-00000078BEF8FD80: {[W2*H1 Gradient[132 x 1 x *3]] }
-00000078BEF8FE20: {[B2 Gradient[132 x 1]] }
-00000078BEF901E0: {[W1-V Value[128 x 512]] }
-00000078BEF90280: {[W2-V Value[32 x 512]] }
-00000078BEF903C0: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
-00000078BEF90500: {[W0-SVD Value[512 x 363]] }
-00000078BEF905A0: {[LogOfPrior Value[132]] }
-00000078BEF90640: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
-00000078BEF90780: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
-00000078BEF90820: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
-00000078BEF90A00: {[EvalErrorPrediction Value[1]] }
-00000078BEF90B40: {[W1-U Value[512 x 128]] }
-00000078BEF90BE0: {[MVNormalizedFeatures Value[363 x *3]] }
-00000078BEF90C80: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
-00000078BEF90D20: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
-00000078BEF90DC0: {[W2-U Value[132 x 32]] }
-00000078BEF91040: {[CrossEntropyWithSoftmax Value[1]] }
-00000078BEF91400: {[W2-SVD Value[132 x 512]] }
-00000078BEF91540: {[W1-SVD Value[512 x 512]] }
+000000606449C200: {[MeanOfFeatures Value[363]] }
+000000606449C340: {[labels Value[132 x *3]] }
+000000606449C3E0: {[B2 Value[132 x 1]] }
+000000606449C520: {[Prior Value[132]] }
+000000606449C840: {[B0 Value[512 x 1]] }
+000000606449CF20: {[W1-V Value[128 x 512]] }
+000000606449D100: {[features Value[363 x *3]] }
+000000606449D600: {[W1-U Value[512 x 128]] }
+000000606449D6A0: {[InvStdOfFeatures Value[363]] }
+000000606449D9C0: {[W0-V Value[104 x 363]] }
+000000606449DA60: {[B1 Value[512 x 1]] }
+000000606449DCE0: {[W0-U Value[512 x 104]] }
+00000060644D1D30: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
+00000060644D1DD0: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
+00000060644D1E70: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
+00000060644D20F0: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
+00000060644D2230: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
+00000060644D2370: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000060644D2410: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
+00000060644D2690: {[B2 Gradient[132 x 1]] }
+00000060644D2730: {[W1-SVD Value[512 x 512]] }
+00000060644D2CD0: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
+00000060644D2D70: {[W2-U Value[132 x 32]] }
+00000060644D2F50: {[LogOfPrior Value[132]] }
+00000060644D2FF0: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
+00000060644D3310: {[EvalErrorPrediction Value[1]] }
+00000060644D33B0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
+00000060644D34F0: {[W0-SVD Value[512 x 363]] }
+00000060644D3590: {[W2*H1 Gradient[132 x 1 x *3]] }
+00000060644D3630: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
+00000060644D3770: {[W2-V Value[32 x 512]] }
+00000060644D3A90: {[CrossEntropyWithSoftmax Value[1]] }
+00000060644D3B30: {[W2-SVD Value[132 x 512]] }
+00000060644D3BD0: {[MVNormalizedFeatures Value[363 x *3]] }
 
-05/03/2016 14:27:54: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:28:31: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:27:54: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:31: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:27:54: Starting minibatch loop.
-05/03/2016 14:27:55:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89942341 * 10240; EvalErrorPrediction = 0.51787109 * 10240; time = 0.7715s; samplesPerSecond = 13272.8
-05/03/2016 14:27:55:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81449413 * 10240; EvalErrorPrediction = 0.50478516 * 10240; time = 0.6280s; samplesPerSecond = 16305.5
-05/03/2016 14:27:55: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.85695877 * 20480; EvalErrorPrediction = 0.51132813 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-005; epochTime=1.46681s
-05/03/2016 14:27:55: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn.1'
+05/03/2016 14:28:31: Starting minibatch loop.
+05/03/2016 14:28:32:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89942341 * 10240; EvalErrorPrediction = 0.51787109 * 10240; time = 0.6512s; samplesPerSecond = 15725.9
+05/03/2016 14:28:32:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.81449413 * 10240; EvalErrorPrediction = 0.50478516 * 10240; time = 0.6213s; samplesPerSecond = 16480.4
+05/03/2016 14:28:32: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.85695877 * 20480; EvalErrorPrediction = 0.51132813 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-005; epochTime=1.44439s
+05/03/2016 14:28:32: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn.1'
 
-05/03/2016 14:27:55: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:33: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:27:55: Starting minibatch loop.
-05/03/2016 14:27:56:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.83477077 * 10240; EvalErrorPrediction = 0.50781250 * 10240; time = 0.6250s; samplesPerSecond = 16384.0
-05/03/2016 14:27:56:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.80250683 * 10240; EvalErrorPrediction = 0.50664062 * 10240; time = 0.6030s; samplesPerSecond = 16982.3
-05/03/2016 14:27:56: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.81863880 * 20480; EvalErrorPrediction = 0.50722656 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-005; epochTime=1.23144s
-05/03/2016 14:27:56: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_cpu/models/cntkSpeech.svd.dnn'
-05/03/2016 14:27:56: CNTKCommandTrainEnd: SVDTrain
+05/03/2016 14:28:33: Starting minibatch loop.
+05/03/2016 14:28:33:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.83477077 * 10240; EvalErrorPrediction = 0.50781250 * 10240; time = 0.6380s; samplesPerSecond = 16050.9
+05/03/2016 14:28:34:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.80250683 * 10240; EvalErrorPrediction = 0.50664062 * 10240; time = 0.6308s; samplesPerSecond = 16234.1
+05/03/2016 14:28:34: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.81863880 * 20480; EvalErrorPrediction = 0.50722656 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-005; epochTime=1.27411s
+05/03/2016 14:28:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_cpu/models/cntkSpeech.svd.dnn'
+05/03/2016 14:28:34: CNTKCommandTrainEnd: SVDTrain
 
-05/03/2016 14:27:56: Action "train" complete.
+05/03/2016 14:28:34: Action "train" complete.
 
-05/03/2016 14:27:56: __COMPLETED__
\ No newline at end of file
+05/03/2016 14:28:34: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.gpu.txt
index b92004d9f..f1ca6d8e2 100644
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/baseline.windows.gpu.txt
@@ -1,4 +1,4 @@
-=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu DeviceId=0 timestamping=true reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu DeviceId=0 timestamping=true
 -------------------------------------------------------------------
 Build info: 
 
@@ -16,31 +16,31 @@ Build info:
 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
 -------------------------------------------------------------------
 Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-05/03/2016 14:27:58: -------------------------------------------------------------------
-05/03/2016 14:27:58: Build info: 
+05/03/2016 14:28:35: -------------------------------------------------------------------
+05/03/2016 14:28:35: Build info: 
 
-05/03/2016 14:27:58: 		Built time: May  3 2016 13:23:06
-05/03/2016 14:27:58: 		Last modified date: Mon Apr 18 00:00:12 2016
-05/03/2016 14:27:58: 		Build type: Release
-05/03/2016 14:27:58: 		Build target: GPU
-05/03/2016 14:27:58: 		With 1bit-SGD: no
-05/03/2016 14:27:58: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
-05/03/2016 14:27:58: 		CUB_PATH: C:\src\cub-1.4.1
-05/03/2016 14:27:58: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
-05/03/2016 14:27:58: 		Build Branch: HEAD
-05/03/2016 14:27:58: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
-05/03/2016 14:27:58: 		Built by svcphil on LIANA-09-w
-05/03/2016 14:27:58: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-05/03/2016 14:27:58: -------------------------------------------------------------------
+05/03/2016 14:28:35: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:28:35: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:28:35: 		Build type: Release
+05/03/2016 14:28:35: 		Build target: GPU
+05/03/2016 14:28:35: 		With 1bit-SGD: no
+05/03/2016 14:28:35: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:28:35: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:28:35: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:28:35: 		Build Branch: HEAD
+05/03/2016 14:28:35: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:28:35: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:28:35: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:28:35: -------------------------------------------------------------------
 
-05/03/2016 14:27:58: Running on cntk-muc02 at 2016/05/03 14:27:58
-05/03/2016 14:27:58: Command line: 
-C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu  DeviceId=0  timestamping=true  reader=[readerType=ExperimentalHTKMLFReader]  reader=[prefetch=true]
+05/03/2016 14:28:35: Running on cntk-muc02 at 2016/05/03 14:28:35
+05/03/2016 14:28:35: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD/cntk.cntk  currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu  DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu  DeviceId=0  timestamping=true
 
 
 
-05/03/2016 14:27:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:58: precision=float
+05/03/2016 14:28:35: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:35: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=$DeviceId$
 speechTrain=[
@@ -118,25 +118,23 @@ SVDTrain=[
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:27:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:35: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:58: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
-05/03/2016 14:27:58: precision=float
+05/03/2016 14:28:35: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:35: precision=float
 command=speechTrain:modelDecomposition:SVDTrain
 deviceId=0
 speechTrain=[
     action=train
     makeMode=false
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn
     deviceId=0
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -181,19 +179,19 @@ reader=[
 ]
 modelDecomposition=[
     action=SVD
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn
+    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 SVDTrain=[
     action=train
     makeMode=true
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
     deviceId=0
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/nonExistent.ndl
+        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -208,18 +206,16 @@ SVDTrain=[
     ]
 ]
 currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
-RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
 ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
-OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 DeviceId=0
 timestamping=true
-reader=[readerType=ExperimentalHTKMLFReader]
-reader=[prefetch=true]
 
-05/03/2016 14:27:58: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:35: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
 
-05/03/2016 14:27:58: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:28:35: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
 configparameters: cntk.cntk:command=speechTrain:modelDecomposition:SVDTrain
 configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\SVD
 configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data
@@ -227,13 +223,13 @@ configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Te
 configparameters: cntk.cntk:deviceId=0
 configparameters: cntk.cntk:modelDecomposition=[
     action=SVD
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn
+    outputmodelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0
     KeepRatio=0.5
     NodeNameRegex=W.*
 ]
 
-configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 configparameters: cntk.cntk:precision=float
 configparameters: cntk.cntk:reader=[
     readerType=HTKMLFReader
@@ -252,13 +248,13 @@ configparameters: cntk.cntk:reader=[
         labelDim=132
         labelType=Category
     ]
-] [readerType=ExperimentalHTKMLFReader] [prefetch=true]
+]
 
-configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu
+configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu
 configparameters: cntk.cntk:speechTrain=[
     action=train
     makeMode=false
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn
     deviceId=0
     traceLevel=1
     SimpleNetworkBuilder=[
@@ -287,11 +283,11 @@ configparameters: cntk.cntk:speechTrain=[
 configparameters: cntk.cntk:SVDTrain=[
     action=train
     makeMode=true
-    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
     deviceId=0
     traceLevel=1
     NDLNetworkBuilder=[
-        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/nonExistent.ndl
+        NetworkDescription=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/nonExistent.ndl
     ]
     SGD=[
         epochSize=20480
@@ -307,32 +303,31 @@ configparameters: cntk.cntk:SVDTrain=[
 ]
 
 configparameters: cntk.cntk:timestamping=true
-05/03/2016 14:27:58: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
-05/03/2016 14:27:58: Commands: speechTrain modelDecomposition SVDTrain
-05/03/2016 14:27:58: Precision = "float"
-05/03/2016 14:27:58: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn
-05/03/2016 14:27:58: CNTKCommandTrainInfo: speechTrain : 3
-05/03/2016 14:27:58: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn
-05/03/2016 14:27:58: CNTKCommandTrainInfo: SVDTrain : 2
-05/03/2016 14:27:58: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
+05/03/2016 14:28:35: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:28:35: Commands: speechTrain modelDecomposition SVDTrain
+05/03/2016 14:28:35: Precision = "float"
+05/03/2016 14:28:35: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn
+05/03/2016 14:28:35: CNTKCommandTrainInfo: speechTrain : 3
+05/03/2016 14:28:35: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn
+05/03/2016 14:28:35: CNTKCommandTrainInfo: SVDTrain : 2
+05/03/2016 14:28:35: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 5
 
-05/03/2016 14:27:58: ##############################################################################
-05/03/2016 14:27:58: #                                                                            #
-05/03/2016 14:27:58: # Action "train"                                                             #
-05/03/2016 14:27:58: #                                                                            #
-05/03/2016 14:27:58: ##############################################################################
+05/03/2016 14:28:35: ##############################################################################
+05/03/2016 14:28:35: #                                                                            #
+05/03/2016 14:28:35: # Action "train"                                                             #
+05/03/2016 14:28:35: #                                                                            #
+05/03/2016 14:28:35: ##############################################################################
 
-05/03/2016 14:27:58: CNTKCommandTrainBegin: speechTrain
+05/03/2016 14:28:35: CNTKCommandTrainBegin: speechTrain
 SimpleNetworkBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:27:58: Creating virgin network.
+05/03/2016 14:28:35: Creating virgin network.
 Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
 
 Post-processing network...
@@ -385,14 +380,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:27:58: Created model with 25 nodes on GPU 0.
+05/03/2016 14:28:36: Created model with 25 nodes on GPU 0.
 
-05/03/2016 14:27:58: Training criterion node(s):
-05/03/2016 14:27:58: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:36: Training criterion node(s):
+05/03/2016 14:28:36: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:27:58: Evaluation criterion node(s):
+05/03/2016 14:28:36: Evaluation criterion node(s):
 
-05/03/2016 14:27:58: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:36: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -400,114 +395,119 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
-000000E358371A00: {[features Value[363 x *]] }
-000000E370BE4930: {[W1 Value[512 x 512]] }
-000000E370BE4B10: {[B2 Value[132 x 1]] }
-000000E370BE4C50: {[B1 Value[512 x 1]] }
-000000E370BE51F0: {[W2 Value[132 x 512]] }
-000000E370BE53D0: {[B0 Value[512 x 1]] }
-000000E370BE5830: {[InvStdOfFeatures Value[363]] }
-000000E370BE5970: {[MeanOfFeatures Value[363]] }
-000000E370BE5E70: {[W0 Value[512 x 363]] }
-000000E37375E640: {[ScaledLogLikelihood Value[132 x 1 x *]] }
-000000E37375E6E0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
-000000E37375E780: {[EvalErrorPrediction Value[1]] }
-000000E37375E820: {[Prior Value[132]] }
-000000E37375EB40: {[LogOfPrior Value[132]] }
-000000E37375EDC0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
-000000E37375F0E0: {[MVNormalizedFeatures Value[363 x *]] }
-000000E37375F180: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
-000000E37375F2C0: {[CrossEntropyWithSoftmax Gradient[1]] }
-000000E37375F540: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
-000000E37375F720: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
-000000E37375F9A0: {[W2*H1 Gradient[132 x 1 x *]] }
-000000E37375FB80: {[CrossEntropyWithSoftmax Value[1]] }
-000000E37375FC20: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
-000000E37375FCC0: {[W0*features Value[512 x *]] }
-000000E37375FD60: {[labels Value[132 x *]] }
-000000E373760120: {[B2 Gradient[132 x 1]] }
-000000E373760300: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
-000000E3737604E0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+00000091C20F13F0: {[features Value[363 x *]] }
+00000091DBD7C570: {[W2 Value[132 x 512]] }
+00000091DBD7CA70: {[W1 Value[512 x 512]] }
+00000091DBD7CED0: {[B0 Value[512 x 1]] }
+00000091DBD7D3D0: {[B1 Value[512 x 1]] }
+00000091DBD7D510: {[MeanOfFeatures Value[363]] }
+00000091DBD7D650: {[B2 Value[132 x 1]] }
+00000091DBD7D8D0: {[labels Value[132 x *]] }
+00000091DBD7DAB0: {[InvStdOfFeatures Value[363]] }
+00000091DBD7DDD0: {[W0 Value[512 x 363]] }
+00000091DC2BD1A0: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
+00000091DC2BD380: {[MVNormalizedFeatures Value[363 x *]] }
+00000091DC2BD420: {[W0*features Value[512 x *]] }
+00000091DC2BD560: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000091DC2BD740: {[W2*H1 Gradient[132 x 1 x *]] }
+00000091DC2BD920: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
+00000091DC2BD9C0: {[B2 Gradient[132 x 1]] }
+00000091DC2BDD80: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
+00000091DC2BDF60: {[ScaledLogLikelihood Value[132 x 1 x *]] }
+00000091DC2BE280: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
+00000091DC2BE320: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
+00000091DC2BE460: {[CrossEntropyWithSoftmax Value[1]] }
+00000091DC2BE640: {[LogOfPrior Value[132]] }
+00000091DC2BEAA0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
+00000091DC2BEC80: {[Prior Value[132]] }
+00000091DC2BEDC0: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
+00000091DC2BEE60: {[EvalErrorPrediction Value[1]] }
+00000091DC2BEF00: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
 
 
-05/03/2016 14:27:58: Precomputing --> 3 PreCompute nodes found.
+05/03/2016 14:28:36: Precomputing --> 3 PreCompute nodes found.
 
-05/03/2016 14:27:58: 	MeanOfFeatures = Mean()
-05/03/2016 14:27:58: 	InvStdOfFeatures = InvStdDev()
-05/03/2016 14:27:58: 	Prior = Mean()
+05/03/2016 14:28:36: 	MeanOfFeatures = Mean()
+05/03/2016 14:28:36: 	InvStdOfFeatures = InvStdDev()
+05/03/2016 14:28:36: 	Prior = Mean()
+minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:28:01: Precomputing --> Completed.
+05/03/2016 14:28:38: Precomputing --> Completed.
 
 
-05/03/2016 14:28:01: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+05/03/2016 14:28:38: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:28:01: Starting minibatch loop.
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645981 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.1597s; samplesPerSecond = 4008.5
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315750 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0242s; samplesPerSecond = 26401.6
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180588 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0257s; samplesPerSecond = 24895.0
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158096 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0258s; samplesPerSecond = 24787.9
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668945 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0257s; samplesPerSecond = 24882.4
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866364 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0255s; samplesPerSecond = 25096.1
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51809235 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0254s; samplesPerSecond = 25237.6
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455200 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0250s; samplesPerSecond = 25581.6
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829346 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0252s; samplesPerSecond = 25398.8
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167236 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0247s; samplesPerSecond = 25944.5
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861633 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0248s; samplesPerSecond = 25794.0
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32616882 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0248s; samplesPerSecond = 25839.8
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16897583 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0246s; samplesPerSecond = 26057.6
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08891907 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0250s; samplesPerSecond = 25616.4
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06005249 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0247s; samplesPerSecond = 25897.3
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128540 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0248s; samplesPerSecond = 25831.4
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90172119 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0248s; samplesPerSecond = 25804.4
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262329 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0247s; samplesPerSecond = 25918.3
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515503 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0246s; samplesPerSecond = 26034.3
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67383423 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0246s; samplesPerSecond = 26065.0
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869263 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0246s; samplesPerSecond = 26020.5
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032349 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0246s; samplesPerSecond = 25998.3
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134033 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0246s; samplesPerSecond = 26060.8
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362549 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0246s; samplesPerSecond = 26054.4
-05/03/2016 14:28:01:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640015 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0246s; samplesPerSecond = 25976.1
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745483 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0246s; samplesPerSecond = 26007.8
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16415405 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0247s; samplesPerSecond = 25919.3
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30347290 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0245s; samplesPerSecond = 26087.3
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398804 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0248s; samplesPerSecond = 25810.6
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322266 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0249s; samplesPerSecond = 25712.1
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664429 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0249s; samplesPerSecond = 25719.3
-05/03/2016 14:28:02:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246582 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0246s; samplesPerSecond = 25993.0
-05/03/2016 14:28:02: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000324 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.991814s
-05/03/2016 14:28:02: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn.1'
+05/03/2016 14:28:38: Starting minibatch loop.
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.45645981 * 640; EvalErrorPrediction = 0.92500000 * 640; time = 0.0292s; samplesPerSecond = 21953.1
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.22315750 * 640; EvalErrorPrediction = 0.90156250 * 640; time = 0.0239s; samplesPerSecond = 26769.3
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.95180588 * 640; EvalErrorPrediction = 0.84687500 * 640; time = 0.0239s; samplesPerSecond = 26789.5
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  31-  40, 12.50%]: CrossEntropyWithSoftmax = 3.94158096 * 640; EvalErrorPrediction = 0.89843750 * 640; time = 0.0238s; samplesPerSecond = 26902.1
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  41-  50, 15.63%]: CrossEntropyWithSoftmax = 3.85668945 * 640; EvalErrorPrediction = 0.91093750 * 640; time = 0.0238s; samplesPerSecond = 26945.1
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  51-  60, 18.75%]: CrossEntropyWithSoftmax = 3.72866364 * 640; EvalErrorPrediction = 0.89531250 * 640; time = 0.0236s; samplesPerSecond = 27109.5
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  61-  70, 21.88%]: CrossEntropyWithSoftmax = 3.51809235 * 640; EvalErrorPrediction = 0.82968750 * 640; time = 0.0238s; samplesPerSecond = 26941.7
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  71-  80, 25.00%]: CrossEntropyWithSoftmax = 3.48455200 * 640; EvalErrorPrediction = 0.80781250 * 640; time = 0.0237s; samplesPerSecond = 27039.6
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  81-  90, 28.13%]: CrossEntropyWithSoftmax = 3.33829346 * 640; EvalErrorPrediction = 0.76875000 * 640; time = 0.0238s; samplesPerSecond = 26871.6
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[  91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.50167236 * 640; EvalErrorPrediction = 0.79843750 * 640; time = 0.0239s; samplesPerSecond = 26763.7
+05/03/2016 14:28:38:  Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.22861633 * 640; EvalErrorPrediction = 0.80000000 * 640; time = 0.0236s; samplesPerSecond = 27063.6
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.32616882 * 640; EvalErrorPrediction = 0.79062500 * 640; time = 0.0236s; samplesPerSecond = 27085.4
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.16897583 * 640; EvalErrorPrediction = 0.77968750 * 640; time = 0.0239s; samplesPerSecond = 26809.7
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.08891907 * 640; EvalErrorPrediction = 0.77656250 * 640; time = 0.0237s; samplesPerSecond = 27029.3
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 3.06005249 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0237s; samplesPerSecond = 27027.0
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 2.91128540 * 640; EvalErrorPrediction = 0.69531250 * 640; time = 0.0236s; samplesPerSecond = 27061.3
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.90172119 * 640; EvalErrorPrediction = 0.72968750 * 640; time = 0.0236s; samplesPerSecond = 27072.8
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.73262329 * 640; EvalErrorPrediction = 0.65312500 * 640; time = 0.0230s; samplesPerSecond = 27818.8
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.66515503 * 640; EvalErrorPrediction = 0.68437500 * 640; time = 0.0228s; samplesPerSecond = 28124.5
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.67383423 * 640; EvalErrorPrediction = 0.66406250 * 640; time = 0.0233s; samplesPerSecond = 27461.9
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.52869263 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0237s; samplesPerSecond = 27033.9
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60032349 * 640; EvalErrorPrediction = 0.66718750 * 640; time = 0.0238s; samplesPerSecond = 26936.0
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.51134033 * 640; EvalErrorPrediction = 0.64843750 * 640; time = 0.0236s; samplesPerSecond = 27100.3
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.45362549 * 640; EvalErrorPrediction = 0.63750000 * 640; time = 0.0236s; samplesPerSecond = 27137.0
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.41640015 * 640; EvalErrorPrediction = 0.61562500 * 640; time = 0.0237s; samplesPerSecond = 27025.9
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39745483 * 640; EvalErrorPrediction = 0.62812500 * 640; time = 0.0238s; samplesPerSecond = 26934.9
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.16415405 * 640; EvalErrorPrediction = 0.56718750 * 640; time = 0.0239s; samplesPerSecond = 26743.6
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.30347290 * 640; EvalErrorPrediction = 0.63593750 * 640; time = 0.0236s; samplesPerSecond = 27084.2
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.24398804 * 640; EvalErrorPrediction = 0.60937500 * 640; time = 0.0237s; samplesPerSecond = 27055.6
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.15322266 * 640; EvalErrorPrediction = 0.57968750 * 640; time = 0.0238s; samplesPerSecond = 26890.8
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.21664429 * 640; EvalErrorPrediction = 0.59531250 * 640; time = 0.0232s; samplesPerSecond = 27581.5
+05/03/2016 14:28:39:  Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.25246582 * 640; EvalErrorPrediction = 0.60156250 * 640; time = 0.0228s; samplesPerSecond = 28055.4
+05/03/2016 14:28:39: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.00000324 * 20480; EvalErrorPrediction = 0.72836914 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=0.763191s
+05/03/2016 14:28:39: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn.1'
 
-05/03/2016 14:28:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+05/03/2016 14:28:39: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:28:02: Starting minibatch loop.
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151951 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0487s; samplesPerSecond = 52543.0
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395710 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0408s; samplesPerSecond = 62777.4
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575516 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0408s; samplesPerSecond = 62738.9
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485039 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0407s; samplesPerSecond = 62873.0
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324280 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0403s; samplesPerSecond = 63525.1
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109344 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0402s; samplesPerSecond = 63706.9
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496002 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0396s; samplesPerSecond = 64675.9
-05/03/2016 14:28:02:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944366 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0397s; samplesPerSecond = 64470.6
-05/03/2016 14:28:02: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560276 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.333682s
-05/03/2016 14:28:02: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn.2'
+05/03/2016 14:28:39: Starting minibatch loop.
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08151951 * 2560; EvalErrorPrediction = 0.55859375 * 2560; time = 0.0453s; samplesPerSecond = 56458.5
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 1.98395710 * 2560; EvalErrorPrediction = 0.54257813 * 2560; time = 0.0388s; samplesPerSecond = 66049.2
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.98575516 * 2560; EvalErrorPrediction = 0.54492188 * 2560; time = 0.0388s; samplesPerSecond = 66021.9
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  31-  40, 50.00%]: CrossEntropyWithSoftmax = 1.90485039 * 2560; EvalErrorPrediction = 0.53164062 * 2560; time = 0.0383s; samplesPerSecond = 66893.1
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  41-  50, 62.50%]: CrossEntropyWithSoftmax = 1.88324280 * 2560; EvalErrorPrediction = 0.52539063 * 2560; time = 0.0386s; samplesPerSecond = 66290.3
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  51-  60, 75.00%]: CrossEntropyWithSoftmax = 1.89109344 * 2560; EvalErrorPrediction = 0.53359375 * 2560; time = 0.0388s; samplesPerSecond = 65909.7
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  61-  70, 87.50%]: CrossEntropyWithSoftmax = 1.89496002 * 2560; EvalErrorPrediction = 0.52890625 * 2560; time = 0.0390s; samplesPerSecond = 65599.0
+05/03/2016 14:28:39:  Epoch[ 2 of 3]-Minibatch[  71-  80, 100.00%]: CrossEntropyWithSoftmax = 1.85944366 * 2560; EvalErrorPrediction = 0.52265625 * 2560; time = 0.0393s; samplesPerSecond = 65077.0
+05/03/2016 14:28:39: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.93560276 * 20480; EvalErrorPrediction = 0.53603516 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.318713s
+05/03/2016 14:28:39: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn.2'
 
-05/03/2016 14:28:02: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:39: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:28:02: Starting minibatch loop.
-05/03/2016 14:28:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1303s; samplesPerSecond = 78601.7
-05/03/2016 14:28:03:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1132s; samplesPerSecond = 90453.0
-05/03/2016 14:28:03: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.24729s
-05/03/2016 14:28:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.dnn'
-05/03/2016 14:28:03: CNTKCommandTrainEnd: speechTrain
+05/03/2016 14:28:39: Starting minibatch loop.
+05/03/2016 14:28:40:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.86752853 * 10240; EvalErrorPrediction = 0.52177734 * 10240; time = 0.1269s; samplesPerSecond = 80669.3
+05/03/2016 14:28:40:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.87358780 * 10240; EvalErrorPrediction = 0.51542969 * 10240; time = 0.1130s; samplesPerSecond = 90641.9
+05/03/2016 14:28:40: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.87055817 * 20480; EvalErrorPrediction = 0.51860352 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.244068s
+05/03/2016 14:28:40: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.dnn'
+05/03/2016 14:28:40: CNTKCommandTrainEnd: speechTrain
 
-05/03/2016 14:28:03: Action "train" complete.
+05/03/2016 14:28:40: Action "train" complete.
 
 
-05/03/2016 14:28:03: ##############################################################################
-05/03/2016 14:28:03: #                                                                            #
-05/03/2016 14:28:03: # Action "SVD"                                                               #
-05/03/2016 14:28:03: #                                                                            #
-05/03/2016 14:28:03: ##############################################################################
+05/03/2016 14:28:40: ##############################################################################
+05/03/2016 14:28:40: #                                                                            #
+05/03/2016 14:28:40: # Action "SVD"                                                               #
+05/03/2016 14:28:40: #                                                                            #
+05/03/2016 14:28:40: ##############################################################################
 
 
 Post-processing network...
@@ -565,7 +565,7 @@ ParameterSVD: start to process group 0 with KeepRatio=0.50
 --------------------------------------------------------------------------------------------
 Performing SVD for a   512-by-363   matrix (node name: W0                  ) ---  computation time  0.10 secs ;  keep 50.0% energy ===> keep   104 svd values (reduce to 49.0% parameters) 
 Performing SVD for a   512-by-512   matrix (node name: W1                  ) ---  computation time  0.16 secs ;  keep 50.0% energy ===> keep   128 svd values (reduce to 50.0% parameters) 
-Performing SVD for a   132-by-512   matrix (node name: W2                  ) ---  computation time  0.02 secs ;  keep 50.0% energy ===> keep    32 svd values (reduce to 30.5% parameters) 
+Performing SVD for a   132-by-512   matrix (node name: W2                  ) ---  computation time  0.03 secs ;  keep 50.0% energy ===> keep    32 svd values (reduce to 30.5% parameters) 
 
 Post-processing network...
 
@@ -624,26 +624,25 @@ Validating network, final pass.
 Post-processing network complete.
 
 
-05/03/2016 14:28:03: Action "SVD" complete.
+05/03/2016 14:28:40: Action "SVD" complete.
 
 
-05/03/2016 14:28:03: ##############################################################################
-05/03/2016 14:28:03: #                                                                            #
-05/03/2016 14:28:03: # Action "train"                                                             #
-05/03/2016 14:28:03: #                                                                            #
-05/03/2016 14:28:03: ##############################################################################
+05/03/2016 14:28:40: ##############################################################################
+05/03/2016 14:28:40: #                                                                            #
+05/03/2016 14:28:40: # Action "train"                                                             #
+05/03/2016 14:28:40: #                                                                            #
+05/03/2016 14:28:40: ##############################################################################
 
-05/03/2016 14:28:03: CNTKCommandTrainBegin: SVDTrain
+05/03/2016 14:28:40: CNTKCommandTrainBegin: SVDTrain
 NDLBuilder Using GPU 0
-Reading script file glob_0000.scp ... 948 entries
-HTKDataDeserializer::HTKDataDeserializer: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
-HTKDataDeserializer::HTKDataDeserializer: determined feature kind as 363-dimensional 'USER' with frame shift 10.0 ms
+reading script file glob_0000.scp ... 948 entries
 total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list
 htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries
-MLFDataDeserializer::MLFDataDeserializer: read 252734 sequences
-MLFDataDeserializer::MLFDataDeserializer: read 948 utterances
+...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
+label set 0: 129 classes
+minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
 
-05/03/2016 14:28:03: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.0'.
+05/03/2016 14:28:40: Starting from checkpoint. Loading network from 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.0'.
 
 Post-processing network...
 
@@ -701,14 +700,14 @@ Validating network, final pass.
 
 Post-processing network complete.
 
-05/03/2016 14:28:03: Loaded model with 31 nodes on GPU 0.
+05/03/2016 14:28:40: Loaded model with 31 nodes on GPU 0.
 
-05/03/2016 14:28:03: Training criterion node(s):
-05/03/2016 14:28:03: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
+05/03/2016 14:28:40: Training criterion node(s):
+05/03/2016 14:28:40: 	CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
 
-05/03/2016 14:28:03: Evaluation criterion node(s):
+05/03/2016 14:28:40: Evaluation criterion node(s):
 
-05/03/2016 14:28:03: 	EvalErrorPrediction = ErrorPrediction
+05/03/2016 14:28:40: 	EvalErrorPrediction = ErrorPrediction
 
 
 Allocating matrices for forward and/or backward propagation.
@@ -716,60 +715,63 @@ Allocating matrices for forward and/or backward propagation.
 Memory Sharing Structure:
 
 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *3]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *3]] [PosteriorProb Value[132 x 1 x *3]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *3]] [features Gradient[363 x *3]] [labels Gradient[132 x *3]] }
-000000E370BE4610: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
-000000E370BE4A70: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
-000000E370BE50B0: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
-000000E370BE5150: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
-000000E370BE5290: {[CrossEntropyWithSoftmax Gradient[1]] }
-000000E370BE53D0: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
-000000E370BE5470: {[W2*H1 Gradient[132 x 1 x *3]] }
-000000E370BE56F0: {[B2 Gradient[132 x 1]] }
-000000E3735C0CF0: {[W2-SVD Value[132 x 512]] }
-000000E3735C0ED0: {[W1-SVD Value[512 x 512]] }
-000000E3735C1010: {[W0-SVD Value[512 x 363]] }
-000000E3735C1150: {[LogOfPrior Value[132]] }
-000000E3735C11F0: {[MVNormalizedFeatures Value[363 x *3]] }
-000000E3735C1290: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
-000000E3735C1330: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
-000000E3735C13D0: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
-000000E3735C1650: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
-000000E3735C16F0: {[W2-U Value[132 x 32]] }
-000000E3735C1DD0: {[W1-V Value[128 x 512]] }
-000000E3735C27D0: {[EvalErrorPrediction Value[1]] }
-000000E3735C2870: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
-000000E3735C29B0: {[W2-V Value[32 x 512]] }
-000000E3735C2AF0: {[CrossEntropyWithSoftmax Value[1]] }
-000000E37375E820: {[W0-V Value[104 x 363]] }
-000000E37375EA00: {[MeanOfFeatures Value[363]] }
-000000E37375EAA0: {[Prior Value[132]] }
-000000E37375ED20: {[features Value[363 x *3]] }
-000000E37375EE60: {[InvStdOfFeatures Value[363]] }
-000000E37375F220: {[W0-U Value[512 x 104]] }
-000000E37375F680: {[W1-U Value[512 x 128]] }
-000000E37375F720: {[B0 Value[512 x 1]] }
-000000E37375FA40: {[B2 Value[132 x 1]] }
-000000E37375FAE0: {[B1 Value[512 x 1]] }
-000000E37375FCC0: {[labels Value[132 x *3]] }
+00000091DBD7C390: {[B2 Gradient[132 x 1]] }
+00000091DBD7C930: {[CrossEntropyWithSoftmax Gradient[1]] }
+00000091DBD7CA70: {[W2*H1 Gradient[132 x 1 x *3]] }
+00000091DBD7CF70: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *3]] [HLast Gradient[132 x 1 x *3]] }
+00000091DC2BD1A0: {[H1 Value[512 x 1 x *3]] [W0*features Gradient[512 x *3]] [W0-V Gradient[104 x 363]] }
+00000091DC2BD240: {[W1*H1+B1 Value[512 x 1 x *3]] [W1-SVD Gradient[512 x 512]] [W2-U Gradient[132 x 32]] }
+00000091DC2BD380: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *3]] [W1*H1+B1 Gradient[512 x 1 x *3]] [W2*H1 Value[132 x 1 x *3]] }
+00000091DC2BD420: {[W1-SVD Value[512 x 512]] }
+00000091DC2BD4C0: {[H2 Value[512 x 1 x *3]] [W1*H1 Gradient[512 x 1 x *3]] [W2-V Gradient[32 x 512]] }
+00000091DC2BD600: {[HLast Value[132 x 1 x *3]] [W2-SVD Gradient[132 x 512]] }
+00000091DC2BD7E0: {[MVNormalizedFeatures Value[363 x *3]] }
+00000091DC2BD920: {[W2-V Value[32 x 512]] }
+00000091DC2BDA60: {[ScaledLogLikelihood Value[132 x 1 x *3]] }
+00000091DC2BDB00: {[W0*features+B0 Value[512 x 1 x *3]] [W0-SVD Gradient[512 x 363]] [W1-U Gradient[512 x 128]] }
+00000091DC2BDE20: {[W0*features+B0 Gradient[512 x 1 x *3]] [W1*H1 Value[512 x 1 x *3]] [W1-V Gradient[128 x 512]] }
+00000091DC2BE000: {[W2-U Value[132 x 32]] }
+00000091DC2BE140: {[W0-SVD Value[512 x 363]] }
+00000091DC2BE1E0: {[LogOfPrior Value[132]] }
+00000091DC2BE280: {[W0*features Value[512 x *3]] [W0-U Gradient[512 x 104]] }
+00000091DC2BEA00: {[W1-U Value[512 x 128]] }
+00000091DC2BEBE0: {[W1-V Value[128 x 512]] }
+00000091DC2BEDC0: {[EvalErrorPrediction Value[1]] }
+00000091DC2BEF00: {[CrossEntropyWithSoftmax Value[1]] }
+00000091DC2BEFA0: {[W2-SVD Value[132 x 512]] }
+00000091E3539570: {[W0-V Value[104 x 363]] }
+00000091E3539610: {[B2 Value[132 x 1]] }
+00000091E3539750: {[B1 Value[512 x 1]] }
+00000091E35399D0: {[B0 Value[512 x 1]] }
+00000091E3539BB0: {[labels Value[132 x *3]] }
+00000091E3539CF0: {[MeanOfFeatures Value[363]] }
+00000091E353A0B0: {[Prior Value[132]] }
+00000091E353A3D0: {[InvStdOfFeatures Value[363]] }
+00000091E353ABF0: {[W0-U Value[512 x 104]] }
+00000091E353AF10: {[features Value[363 x *3]] }
 
-05/03/2016 14:28:03: No PreCompute nodes found, skipping PreCompute step.
+05/03/2016 14:28:40: No PreCompute nodes found, skipping PreCompute step.
 
-05/03/2016 14:28:03: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:40: Starting Epoch 1: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
+requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 
-05/03/2016 14:28:03: Starting minibatch loop.
-05/03/2016 14:28:04:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87722015 * 10240; EvalErrorPrediction = 0.51220703 * 10240; time = 0.2713s; samplesPerSecond = 37738.9
-05/03/2016 14:28:04:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.79417076 * 10240; EvalErrorPrediction = 0.49912109 * 10240; time = 0.1235s; samplesPerSecond = 82932.4
-05/03/2016 14:28:04: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.83569546 * 20480; EvalErrorPrediction = 0.50566406 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-005; epochTime=0.460598s
-05/03/2016 14:28:04: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn.1'
+05/03/2016 14:28:41: Starting minibatch loop.
+05/03/2016 14:28:41:  Epoch[ 1 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.87722015 * 10240; EvalErrorPrediction = 0.51220703 * 10240; time = 0.1366s; samplesPerSecond = 74962.3
+05/03/2016 14:28:41:  Epoch[ 1 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.79417076 * 10240; EvalErrorPrediction = 0.49912109 * 10240; time = 0.1233s; samplesPerSecond = 83068.3
+05/03/2016 14:28:41: Finished Epoch[ 1 of 2]: [Training] CrossEntropyWithSoftmax = 1.83569546 * 20480; EvalErrorPrediction = 0.50566406 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 9.7656251e-005; epochTime=0.414351s
+05/03/2016 14:28:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn.1'
 
-05/03/2016 14:28:04: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+05/03/2016 14:28:41: Starting Epoch 2: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
+minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 1, with 1 datapasses
 
-05/03/2016 14:28:04: Starting minibatch loop.
-05/03/2016 14:28:04:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.81283226 * 10240; EvalErrorPrediction = 0.50488281 * 10240; time = 0.1262s; samplesPerSecond = 81148.1
-05/03/2016 14:28:04:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.78207207 * 10240; EvalErrorPrediction = 0.50322266 * 10240; time = 0.1264s; samplesPerSecond = 80997.3
-05/03/2016 14:28:04: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.79745216 * 20480; EvalErrorPrediction = 0.50405273 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-005; epochTime=0.254584s
-05/03/2016 14:28:04: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech\ExperimentalHtkmlfReader_SVD@release_gpu/models/cntkSpeech.svd.dnn'
-05/03/2016 14:28:04: CNTKCommandTrainEnd: SVDTrain
+05/03/2016 14:28:41: Starting minibatch loop.
+05/03/2016 14:28:41:  Epoch[ 2 of 2]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.81283226 * 10240; EvalErrorPrediction = 0.50488281 * 10240; time = 0.1251s; samplesPerSecond = 81869.6
+05/03/2016 14:28:41:  Epoch[ 2 of 2]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.78207207 * 10240; EvalErrorPrediction = 0.50322266 * 10240; time = 0.1233s; samplesPerSecond = 83048.1
+05/03/2016 14:28:41: Finished Epoch[ 2 of 2]: [Training] CrossEntropyWithSoftmax = 1.79745216 * 20480; EvalErrorPrediction = 0.50405273 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 9.7656251e-005; epochTime=0.252496s
+05/03/2016 14:28:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503142201.423154\Speech_SVD@release_gpu/models/cntkSpeech.svd.dnn'
+05/03/2016 14:28:41: CNTKCommandTrainEnd: SVDTrain
 
-05/03/2016 14:28:04: Action "train" complete.
+05/03/2016 14:28:41: Action "train" complete.
 
-05/03/2016 14:28:04: __COMPLETED__
\ No newline at end of file
+05/03/2016 14:28:41: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/run-test b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/run-test
index 69ec7918a..c837ce024 100755
--- a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/run-test
+++ b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/SVD/run-test
@@ -5,5 +5,11 @@
 OriginalTestDir=../../SVD
 ConfigDir=$TEST_DIR/$OriginalTestDir
 
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test. Copy from $OriginalTestDir.
+  exit 1
+fi
+
 # cntkrun <CNTK config file name> <additional CNTK args>
 cntkrun cntk.cntk 'reader=[readerType=ExperimentalHTKMLFReader] reader=[prefetch=true]' || exit $?

From 2fe060ed9cc0301e4089e8f20df257ab67b66c4d Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 26 Apr 2016 18:14:50 +0200
Subject: [PATCH 25/51] Adding first implementation of the composite
 transformer

---
 .../Readers/ReaderLib/CompositeTransformer.h  | 84 +++++++++++++++++++
 Source/Readers/ReaderLib/ReaderLib.vcxproj    |  1 +
 .../ReaderLib/ReaderLib.vcxproj.filters       |  2 +
 Source/Readers/ReaderLib/Transformer.h        | 27 ++++--
 4 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 Source/Readers/ReaderLib/CompositeTransformer.h

diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/CompositeTransformer.h
new file mode 100644
index 000000000..1ed19a2b2
--- /dev/null
+++ b/Source/Readers/ReaderLib/CompositeTransformer.h
@@ -0,0 +1,84 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <set>
+
+#include "Transformer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+struct Transformation
+{
+    SlimTransformerPtr m_transfromer;
+    size_t m_streamId;
+};
+
+class CompositeTransformer : public Transformer
+{
+public:
+    CompositeTransformer(const std::vector<Transformation>& transformations) : m_transformations(transformations)
+    {
+    }
+
+    // Initializes the transformer.
+    virtual void Initialize(TransformerPtr next,
+                            const ConfigParameters &) override
+    {
+        m_next = next;
+        m_chainOfStreamDescriptions.reserve(m_transformations.size() + 1);
+        std::vector<StreamDescriptionPtr> streams = m_next->GetStreamDescriptions();
+        m_chainOfStreamDescriptions.push_back(streams);
+        for (auto& t : m_transformations)
+        {
+            streams[t.m_streamId] = t.m_transfromer->Transform(*streams[t.m_streamId]);
+            m_chainOfStreamDescriptions.push_back(streams);
+        }
+    }
+
+    // Sets configuration for the current epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) override
+    {
+        assert(m_next != nullptr);
+        m_next->StartEpoch(config);
+    }
+
+    // Description of streams that the transformer provides.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return m_chainOfStreamDescriptions.back();
+    }
+
+    // Gets next sequences up to a maximum count of samples.
+    // Sequences contains data for all streams.
+    virtual Sequences GetNextSequences(size_t sampleCount) override
+    {
+        assert(m_next != nullptr);
+        Sequences sequences = m_next->GetNextSequences(sampleCount);
+        if (sequences.m_data.empty())
+        {
+            return sequences;
+        }
+
+#pragma omp parallel for schedule(dynamic)
+        for (int j = 0; j < sequences.m_data.front().size(); ++j)
+        {
+            for (auto& t : m_transformations)
+            {
+                sequences.m_data[t.m_streamId][j] = t.m_transfromer->Transform(sequences.m_data[t.m_streamId][j]);
+            }
+        }
+
+        return sequences;
+    }
+
+private:
+    TransformerPtr m_next;
+    std::vector<Transformation> m_transformations;
+    std::vector<std::vector<StreamDescriptionPtr>> m_chainOfStreamDescriptions;
+};
+
+}}}
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj
index e11bd7294..5dc2ceba8 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj
@@ -43,6 +43,7 @@
     <ClInclude Include="CorpusDescriptor.h" />
     <ClInclude Include="Bundler.h" />
     <ClInclude Include="ChunkRandomizer.h" />
+    <ClInclude Include="CompositeTransformer.h" />
     <ClInclude Include="DataDeserializerBase.h" />
     <ClInclude Include="BlockRandomizer.h" />
     <ClInclude Include="Packer.h" />
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
index 0f94ecdfc..97336400b 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
@@ -66,6 +66,8 @@
     </ClInclude>
     <ClInclude Include="..\ExperimentalHTKMLFReader\CorpusDescriptor.h">
       <Filter>Utils</Filter>
+    <ClInclude Include="CompositeTransformer.h">
+      <Filter>Transformers</Filter>
     </ClInclude>
   </ItemGroup>
   <ItemGroup>
diff --git a/Source/Readers/ReaderLib/Transformer.h b/Source/Readers/ReaderLib/Transformer.h
index 667d774f4..fe4dbd1df 100644
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@@ -32,10 +32,6 @@ struct Sequences
 class Transformer;
 typedef std::shared_ptr<Transformer> TransformerPtr;
 
-// Defines a data transformation interface.
-// Transformers are responsible for doing custom transformation of sequences.
-// For example for images, there could be scale, crop, or median transformation.
-// TODO: Adopt to the C#/Java iterator pattern.
 class Transformer
 {
 public:
@@ -58,4 +54,25 @@ public:
     {
     }
 };
-} } }
+
+// Defines a data transformation interface.
+// Transformers are responsible for doing custom transformation of sequences.
+// For example for images, there could be scale, crop, or median transformation.
+class SlimTransformer
+{
+public:
+    // Transformation of the stream.
+    virtual StreamDescriptionPtr Transform(const StreamDescription& inputStream) const = 0;
+
+    // Transformation of the sequence.
+    virtual SequenceDataPtr Transform(SequenceDataPtr sequence) = 0;
+
+    virtual ~SlimTransformer()
+    {
+    }
+};
+
+typedef std::shared_ptr<SlimTransformer> SlimTransformerPtr;
+
+
+}}}

From 4cefc0e14142d128d389123a6d56ee057295f6a2 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 27 Apr 2016 17:12:17 +0200
Subject: [PATCH 26/51] First implementation of slim transformers

---
 .../Readers/ImageReader/ImageReader.vcxproj   |   2 +
 .../ImageReader/ImageReader.vcxproj.filters   |   2 +
 .../ImageReader/ImageSlimTransformers.cpp     | 457 ++++++++++++++++++
 .../ImageReader/ImageSlimTransformers.h       | 144 ++++++
 Source/Readers/ReaderLib/Transformer.h        |   2 +-
 5 files changed, 606 insertions(+), 1 deletion(-)
 create mode 100644 Source/Readers/ImageReader/ImageSlimTransformers.cpp
 create mode 100644 Source/Readers/ImageReader/ImageSlimTransformers.h

diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj b/Source/Readers/ImageReader/ImageReader.vcxproj
index fbc5ca6fe..7bc1da3ae 100644
--- a/Source/Readers/ImageReader/ImageReader.vcxproj
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj
@@ -121,6 +121,7 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
     <ClInclude Include="ImageConfigHelper.h" />
     <ClInclude Include="ImageDataDeserializer.h" />
     <ClInclude Include="ImageReader.h" />
+    <ClInclude Include="ImageSlimTransformers.h" />
     <ClInclude Include="ImageTransformers.h" />
     <ClInclude Include="stdafx.h" />
     <ClInclude Include="targetver.h" />
@@ -133,6 +134,7 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
       <ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="ImageReader.cpp" />
+    <ClCompile Include="ImageSlimTransformers.cpp" />
     <ClCompile Include="ImageTransformers.cpp" />
     <ClCompile Include="stdafx.cpp">
       <PrecompiledHeader>Create</PrecompiledHeader>
diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj.filters b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
index e38d3f31b..d2b93926b 100644
--- a/Source/Readers/ImageReader/ImageReader.vcxproj.filters
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
@@ -21,6 +21,7 @@
       <Filter>Common</Filter>
     </ClCompile>
     <ClCompile Include="ZipByteReader.cpp" />
+    <ClCompile Include="ImageSlimTransformers.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="stdafx.h" />
@@ -39,6 +40,7 @@
     <ClInclude Include="ImageReader.h" />
     <ClInclude Include="ImageConfigHelper.h" />
     <ClInclude Include="ByteReader.h" />
+    <ClInclude Include="ImageSlimTransformers.h" />
   </ItemGroup>
   <ItemGroup>
     <Filter Include="Common">
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.cpp b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
new file mode 100644
index 000000000..c227e2768
--- /dev/null
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
@@ -0,0 +1,457 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+#include <algorithm>
+#include <unordered_map>
+#include <random>
+#include "ImageSlimTransformers.h"
+#include "Config.h"
+#include "ConcStack.h"
+#include "ImageConfigHelper.h"
+#include "StringUtil.h"
+#include "ElementTypeUtils.h"
+
+namespace Microsoft {
+namespace MSR {
+namespace CNTK {
+
+struct ImageSequenceData : DenseSequenceData
+{
+    cv::Mat m_image;
+    // In case we do not copy data - we have to preserve the original sequence.
+    SequenceDataPtr m_original;
+};
+
+SlimImageTransformerBase::SlimImageTransformerBase(const ConfigParameters& cfg) : m_imageElementType(0)
+{
+    m_seed = cfg(L"seed", 0u);
+}
+
+StreamDescription SlimImageTransformerBase::Transform(const StreamDescription& inputStream)
+{
+    m_inputStream = inputStream;
+    m_outputStream = m_inputStream;
+
+    if (m_inputStream.m_storageType != StorageType::dense)
+    {
+        LogicError("ImageTransformerBase supports only dense input streams.");
+    }
+
+    if (m_inputStream.m_elementType == ElementType::tdouble)
+    {
+        m_imageElementType = CV_64F;
+    }
+    else if (m_inputStream.m_elementType == ElementType::tfloat)
+    {
+        m_imageElementType = CV_32F;
+    }
+    else
+    {
+        RuntimeError("Unsupported type");
+    }
+
+    return m_outputStream;
+}
+
+SequenceDataPtr SlimImageTransformerBase::Transform(SequenceDataPtr sequence)
+{
+    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
+
+    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
+    int columns = static_cast<int>(dimensions.m_width);
+    int rows = static_cast<int>(dimensions.m_height);
+    int channels = static_cast<int>(dimensions.m_numChannels);
+
+    auto result = std::make_shared<ImageSequenceData>();
+    int type = CV_MAKETYPE(m_imageElementType, channels);
+    cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
+    Apply(sequence->m_id, buffer);
+    if (!buffer.isContinuous())
+    {
+        buffer = buffer.clone();
+    }
+    else
+    {
+        result->m_original = sequence;
+    }
+    assert(buffer.isContinuous());
+    result->m_image = buffer;
+    result->m_data = buffer.ptr();
+    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
+
+    ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
+    result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
+    return result;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SlimCropTransformer::SlimCropTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
+{
+    m_cropType = ParseCropType(config(L"cropType", ""));
+
+    floatargvector cropRatio = config(L"cropRatio", "1.0");
+    m_cropRatioMin = cropRatio[0];
+    m_cropRatioMax = cropRatio[1];
+
+    if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
+        !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
+        m_cropRatioMin > m_cropRatioMax)
+    {
+        RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
+                     "<= cropMax");
+    }
+
+    m_jitterType = ParseJitterType(config(L"jitterType", ""));
+
+    if (!config.ExistsCurrent(L"hflip"))
+    {
+        m_hFlip = m_cropType == CropType::Random;
+    }
+    else
+    {
+        m_hFlip = config(L"hflip");
+    }
+}
+
+void SlimCropTransformer::Apply(size_t id, cv::Mat &mat)
+{
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+    {
+        return std::make_unique<std::mt19937>(seed);
+    });
+
+    double ratio = 1;
+    switch (m_jitterType)
+    {
+    case RatioJitterType::None:
+        ratio = m_cropRatioMin;
+        break;
+    case RatioJitterType::UniRatio:
+        if (m_cropRatioMin == m_cropRatioMax)
+        {
+            ratio = m_cropRatioMin;
+        }
+        else
+        {
+            ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
+            assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
+        }
+        break;
+    default:
+        RuntimeError("Jitter type currently not implemented.");
+    }
+
+    int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0;
+
+    mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng));
+    if ((m_hFlip && std::bernoulli_distribution()(*rng)) ||
+        viewIndex >= 5)
+    {
+        cv::flip(mat, mat, 1);
+    }
+
+    m_rngs.push(std::move(rng));
+}
+
+SlimCropTransformer::CropType SlimCropTransformer::ParseCropType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "center"))
+    {
+        return CropType::Center;
+    }
+
+    if (AreEqualIgnoreCase(src, "random"))
+    {
+        return CropType::Random;
+    }
+
+    if (AreEqualIgnoreCase(src, "multiview10"))
+    {
+        return CropType::MultiView10;
+    }
+
+    RuntimeError("Invalid crop type: %s.", src.c_str());
+}
+
+SlimCropTransformer::RatioJitterType SlimCropTransformer::ParseJitterType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "none"))
+    {
+        return RatioJitterType::None;
+    }
+
+    if (AreEqualIgnoreCase(src, "uniratio"))
+    {
+        return RatioJitterType::UniRatio;
+    }
+
+    if (AreEqualIgnoreCase(src, "unilength"))
+    {
+        return RatioJitterType::UniLength;
+    }
+
+    if (AreEqualIgnoreCase(src, "uniarea"))
+    {
+        return RatioJitterType::UniArea;
+    }
+
+    RuntimeError("Invalid jitter type: %s.", src.c_str());
+}
+
+cv::Rect SlimCropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol,
+                                          double cropRatio, std::mt19937 &rng)
+{
+    assert(crow > 0);
+    assert(ccol > 0);
+    assert(0 < cropRatio && cropRatio <= 1.0);
+
+    int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
+    int xOff = -1;
+    int yOff = -1;
+    switch (type)
+    {
+    case CropType::Center:
+        assert(viewIndex == 0);
+        xOff = (ccol - cropSize) / 2;
+        yOff = (crow - cropSize) / 2;
+        break;
+    case CropType::Random:
+        assert(viewIndex == 0);
+        xOff = UniIntT(0, ccol - cropSize)(rng);
+        yOff = UniIntT(0, crow - cropSize)(rng);
+        break;
+    case CropType::MultiView10:
+    {
+        assert(0 <= viewIndex && viewIndex < 10);
+        // 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip.
+        int isubView = viewIndex % 5;
+        switch (isubView)
+        {
+            // top-left
+        case 0:
+            xOff = 0;
+            yOff = 0;
+            break;
+            // top-right
+        case 1:
+            xOff = ccol - cropSize;
+            yOff = 0;
+            break;
+            // bottom-left
+        case 2:
+            xOff = 0;
+            yOff = crow - cropSize;
+            break;
+            // bottom-right
+        case 3:
+            xOff = ccol - cropSize;
+            yOff = crow - cropSize;
+            break;
+            // center
+        case 4:
+            xOff = (ccol - cropSize) / 2;
+            yOff = (crow - cropSize) / 2;
+            break;
+        }
+        break;
+    }
+    default:
+        assert(false);
+    }
+
+    assert(0 <= xOff && xOff <= ccol - cropSize);
+    assert(0 <= yOff && yOff <= crow - cropSize);
+    return cv::Rect(xOff, yOff, cropSize, cropSize);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SlimScaleTransformer::SlimScaleTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
+{
+    m_interpMap.emplace("nearest", cv::INTER_NEAREST);
+    m_interpMap.emplace("linear", cv::INTER_LINEAR);
+    m_interpMap.emplace("cubic", cv::INTER_CUBIC);
+    m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
+
+    m_imgWidth = config(L"width");
+    m_imgHeight = config(L"height");
+    m_imgChannels = config(L"channels");
+
+    size_t cfeat = m_imgWidth * m_imgHeight * m_imgChannels;
+    if (cfeat == 0 || cfeat > std::numeric_limits<size_t>().max() / 2)
+        RuntimeError("Invalid image dimensions.");
+
+    m_interp.clear();
+    std::stringstream ss{config(L"interpolations", "")};
+    for (std::string token = ""; std::getline(ss, token, ':');)
+    {
+        // Explicit cast required for GCC.
+        std::transform(token.begin(), token.end(), token.begin(),
+                       (int(*) (int)) std::tolower);
+        StrToIntMapT::const_iterator res = m_interpMap.find(token);
+        if (res != m_interpMap.end())
+            m_interp.push_back((*res).second);
+    }
+
+    if (m_interp.size() == 0)
+        m_interp.push_back(cv::INTER_LINEAR);
+}
+
+void SlimScaleTransformer::Apply(size_t id, cv::Mat &mat)
+{
+    UNUSED(id);
+
+    // If matrix has not been converted to the right type, do it now as rescaling
+    // requires floating point type.
+    if (mat.type() != CV_MAKETYPE(m_imageElementType, m_imgChannels))
+    {
+        mat.convertTo(mat, m_imageElementType);
+    }
+
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+    {
+        return std::make_unique<std::mt19937>(seed);
+    });
+
+
+    auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
+    assert(m_interp.size() > 0);
+    cv::resize(
+        mat, mat,
+        cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
+        0, m_interp[index]);
+
+    m_rngs.push(std::move(rng));
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SlimMeanTransformer::SlimMeanTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
+{
+    std::wstring meanFile = config(L"meanFile", L"");
+    if (meanFile.empty())
+        m_meanImg.release();
+    else
+    {
+        cv::FileStorage fs;
+        // REVIEW alexeyk: this sort of defeats the purpose of using wstring at
+        // all...  [fseide] no, only OpenCV has this problem.
+        fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ);
+        if (!fs.isOpened())
+            RuntimeError("Could not open file: %ls", meanFile.c_str());
+        fs["MeanImg"] >> m_meanImg;
+        int cchan;
+        fs["Channel"] >> cchan;
+        int crow;
+        fs["Row"] >> crow;
+        int ccol;
+        fs["Col"] >> ccol;
+        if (cchan * crow * ccol !=
+            m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
+            RuntimeError("Invalid data in file: %ls", meanFile.c_str());
+        fs.release();
+        m_meanImg = m_meanImg.reshape(cchan, crow);
+    }
+}
+
+void SlimMeanTransformer::Apply(size_t id, cv::Mat &mat)
+{
+    UNUSED(id);
+    assert(m_meanImg.size() == cv::Size(0, 0) ||
+           (m_meanImg.size() == mat.size() &&
+           m_meanImg.channels() == mat.channels()));
+
+    // REVIEW alexeyk: check type conversion (float/double).
+    if (m_meanImg.size() == mat.size())
+    {
+        mat = mat - m_meanImg;
+    }
+}
+
+SlimTransposeTransformer::SlimTransposeTransformer(const ConfigParameters&)
+{
+}
+
+StreamDescription SlimTransposeTransformer::Transform(const StreamDescription& inputStream)
+{
+    m_inputStream = inputStream;
+
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
+
+    // Changing from NHWC to NCHW
+    m_outputStream = m_inputStream;
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
+
+    if (m_inputStream.m_storageType != StorageType::dense)
+    {
+        LogicError("Transpose transformer supports only dense streams.");
+    }
+
+    return m_outputStream;
+}
+
+// Transformation of the sequence.
+SequenceDataPtr SlimTransposeTransformer::Transform(SequenceDataPtr sequence)
+{
+    if (m_inputStream.m_elementType == ElementType::tdouble)
+    {
+        return TypedApply<double>(sequence);
+    }
+
+    if (m_inputStream.m_elementType == ElementType::tfloat)
+    {
+        return TypedApply<float>(sequence);
+    }
+
+    RuntimeError("Unsupported type");
+}
+
+// The class represents a sequence that owns an internal data buffer.
+// Passed from the TransposeTransformer.
+// TODO: Trasposition potentially could be done in place.
+struct DenseSequenceWithBuffer : DenseSequenceData
+{
+    std::vector<char> m_buffer;
+};
+
+template <class TElemType>
+SequenceDataPtr SlimTransposeTransformer::TypedApply(SequenceDataPtr sequence)
+{
+    auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
+    assert(inputSequence.m_numberOfSamples == 1);
+
+    size_t count = m_inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(m_inputStream.m_elementType);
+
+    auto result = std::make_shared<DenseSequenceWithBuffer>();
+    result->m_buffer.resize(count);
+
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, ImageLayoutKind::HWC);
+    size_t rowCount = dimensions.m_height * dimensions.m_width;
+    size_t channelCount = dimensions.m_numChannels;
+
+    auto src = reinterpret_cast<TElemType*>(inputSequence.m_data);
+    auto dst = reinterpret_cast<TElemType*>(result->m_buffer.data());
+
+    for (size_t irow = 0; irow < rowCount; irow++)
+    {
+        for (size_t icol = 0; icol < channelCount; icol++)
+        {
+            dst[icol * rowCount + irow] = src[irow * channelCount + icol];
+        }
+    }
+
+    result->m_sampleLayout = m_outputStream.m_sampleLayout;
+    result->m_data = result->m_buffer.data();
+    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
+    return result;
+}
+
+}}}
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
new file mode 100644
index 000000000..0242ca28b
--- /dev/null
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.h
@@ -0,0 +1,144 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <unordered_map>
+#include <random>
+#include <opencv2/opencv.hpp>
+
+#include "Transformer.h"
+#include "ConcStack.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ConfigParameters;
+
+// Base class for image transformations based on OpenCV
+// that helps to wrap the sequences into OpenCV::Mat class.
+class SlimImageTransformerBase : public SlimTransformer
+{
+public:
+    explicit SlimImageTransformerBase(const ConfigParameters& config);
+
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream) override;
+
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
+
+protected:
+    // Seed  getter.
+    unsigned int GetSeed() const
+    {
+        return m_seed;
+    }
+
+    using Base = SlimTransformer;
+    using UniRealT = std::uniform_real_distribution<double>;
+    using UniIntT = std::uniform_int_distribution<int>;
+
+    // The only function that should be redefined by the inherited classes.
+    virtual void Apply(size_t id, cv::Mat &from) = 0;
+
+protected:
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
+
+    unsigned int m_seed;
+    int m_imageElementType;
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+};
+
+// Crop transformation of the image.
+// Can work on images of any size.
+class SlimCropTransformer : public SlimImageTransformerBase
+{
+public:
+    explicit SlimCropTransformer(const ConfigParameters& config);
+
+protected:
+    virtual void Apply(size_t id, cv::Mat &mat) override;
+
+private:
+    enum class CropType
+    {
+        Center = 0,
+        Random = 1,
+        MultiView10 = 2
+    };
+    enum class RatioJitterType
+    {
+        None = 0,
+        UniRatio = 1,
+        UniLength = 2,
+        UniArea = 3
+    };
+
+    CropType ParseCropType(const std::string &src);
+    RatioJitterType ParseJitterType(const std::string &src);
+    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio,
+                         std::mt19937 &rng);
+
+    CropType m_cropType;
+    double m_cropRatioMin;
+    double m_cropRatioMax;
+    RatioJitterType m_jitterType;
+    bool m_hFlip;
+};
+
+// Scale transformation of the image.
+// Scales the image to the dimensions requested by the network.
+class SlimScaleTransformer : public SlimImageTransformerBase
+{
+public:
+    explicit SlimScaleTransformer(const ConfigParameters& config);
+
+private:
+    virtual void Apply(size_t id, cv::Mat &mat) override;
+
+    using StrToIntMapT = std::unordered_map<std::string, int>;
+    StrToIntMapT m_interpMap;
+    std::vector<int> m_interp;
+
+    size_t m_imgWidth;
+    size_t m_imgHeight;
+    size_t m_imgChannels;
+};
+
+// Mean transformation.
+class SlimMeanTransformer : public SlimImageTransformerBase
+{
+public:
+    explicit SlimMeanTransformer(const ConfigParameters& config);
+
+private:
+    virtual void Apply(size_t id, cv::Mat &mat) override;
+
+    cv::Mat m_meanImg;
+};
+
+// Transpose transformation from HWC to CHW.
+class SlimTransposeTransformer : public SlimTransformerPtr
+{
+public:
+    explicit SlimTransposeTransformer(const ConfigParameters& config);
+
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream);
+
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence);
+
+
+private:
+    template <class TElement>
+    SequenceDataPtr TypedApply(SequenceDataPtr inputSequence);
+
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
+};
+
+}}}
diff --git a/Source/Readers/ReaderLib/Transformer.h b/Source/Readers/ReaderLib/Transformer.h
index fe4dbd1df..342c1ab63 100644
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@@ -62,7 +62,7 @@ class SlimTransformer
 {
 public:
     // Transformation of the stream.
-    virtual StreamDescriptionPtr Transform(const StreamDescription& inputStream) const = 0;
+    virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;
 
     // Transformation of the sequence.
     virtual SequenceDataPtr Transform(SequenceDataPtr sequence) = 0;

From b73c3cce5d852ab11cee8c006b62e9e6fb1e74b9 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 27 Apr 2016 17:13:26 +0200
Subject: [PATCH 27/51] Simple renaming

---
 Source/Readers/ImageReader/ImageSlimTransformers.cpp | 6 +++---
 Source/Readers/ImageReader/ImageSlimTransformers.h   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.cpp b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
index c227e2768..7fc369ee1 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
@@ -403,12 +403,12 @@ SequenceDataPtr SlimTransposeTransformer::Transform(SequenceDataPtr sequence)
 {
     if (m_inputStream.m_elementType == ElementType::tdouble)
     {
-        return TypedApply<double>(sequence);
+        return TypedTransform<double>(sequence);
     }
 
     if (m_inputStream.m_elementType == ElementType::tfloat)
     {
-        return TypedApply<float>(sequence);
+        return TypedTransform<float>(sequence);
     }
 
     RuntimeError("Unsupported type");
@@ -423,7 +423,7 @@ struct DenseSequenceWithBuffer : DenseSequenceData
 };
 
 template <class TElemType>
-SequenceDataPtr SlimTransposeTransformer::TypedApply(SequenceDataPtr sequence)
+SequenceDataPtr SlimTransposeTransformer::TypedTransform(SequenceDataPtr sequence)
 {
     auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
     assert(inputSequence.m_numberOfSamples == 1);
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
index 0242ca28b..790408af3 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.h
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.h
@@ -135,7 +135,7 @@ public:
 
 private:
     template <class TElement>
-    SequenceDataPtr TypedApply(SequenceDataPtr inputSequence);
+    SequenceDataPtr TypedTransform(SequenceDataPtr inputSequence);
 
     StreamDescription m_inputStream;
     StreamDescription m_outputStream;

From a197259ee846da78729f0bd350eb9a4ee9dbab82 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Thu, 28 Apr 2016 10:45:18 +0200
Subject: [PATCH 28/51] Adding composite transformer

---
 Source/Readers/ImageReader/ImageReader.cpp    | 32 ++++++++-----------
 .../ImageReader/ImageSlimTransformers.h       |  2 +-
 .../Readers/ReaderLib/CompositeTransformer.h  |  2 +-
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index 327cc5915..47af5d373 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -7,12 +7,13 @@
 #include "ImageReader.h"
 #include "Config.h"
 #include "ImageConfigHelper.h"
-#include "ImageTransformers.h"
 #include "BlockRandomizer.h"
 #include "NoRandomizer.h"
 #include "ImageDataDeserializer.h"
 #include "FramePacker.h"
+#include "CompositeTransformer.h"
 #include <omp.h>
+#include "ImageSlimTransformers.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -53,29 +54,24 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
 
     randomizer->Initialize(nullptr, config);
 
-    auto cropper = std::make_shared<CropTransformer>();
-    cropper->Initialize(randomizer, config);
+    size_t featureStreamId = configHelper.GetFeatureStreamId();
+    ConfigParameters featureStream = config(m_streams[featureStreamId]->m_name);
 
-    auto scaler = std::make_shared<ScaleTransformer>();
-    scaler->Initialize(cropper, config);
+    // Create transformations.
+    std::vector<Transformation> transformations;
+    transformations.push_back(Transformation{ std::make_shared<SlimCropTransformer>(featureStream), featureStreamId });
+    transformations.push_back(Transformation{ std::make_shared<SlimScaleTransformer>(featureStream), featureStreamId });
+    transformations.push_back(Transformation{ std::make_shared<SlimColorTransformer>(featureStream), featureStreamId });
+    transformations.push_back(Transformation{ std::make_shared<SlimIntensityTransformer>(featureStream), featureStreamId });
+    transformations.push_back(Transformation{ std::make_shared<SlimMeanTransformer>(featureStream), featureStreamId });
 
-    auto color = std::make_shared<ColorTransformer>();
-    color->Initialize(scaler, config);
-
-    auto intensity = std::make_shared<IntensityTransformer>();
-    intensity->Initialize(color, config);
-
-    auto mean = std::make_shared<MeanTransformer>();
-    mean->Initialize(intensity, config);
-
-    TransformerPtr last = mean;
     if (configHelper.GetDataFormat() == CHW)
     {
-        last = std::make_shared<TransposeTransformer>();
-        last->Initialize(mean, config);
+        transformations.push_back(Transformation{ std::make_shared<SlimTransposeTransformer>(featureStream), featureStreamId });
     }
 
-    m_transformer = last;
+    m_transformer = std::make_shared<CompositeTransformer>(transformations);
+    m_transformer->Initialize(randomizer, config);
 
     m_packer = std::make_shared<FramePacker>(
         m_provider,
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
index 790408af3..b69c04b70 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.h
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.h
@@ -121,7 +121,7 @@ private:
 };
 
 // Transpose transformation from HWC to CHW.
-class SlimTransposeTransformer : public SlimTransformerPtr
+class SlimTransposeTransformer : public SlimTransformer
 {
 public:
     explicit SlimTransposeTransformer(const ConfigParameters& config);
diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/CompositeTransformer.h
index 1ed19a2b2..7072ca237 100644
--- a/Source/Readers/ReaderLib/CompositeTransformer.h
+++ b/Source/Readers/ReaderLib/CompositeTransformer.h
@@ -34,7 +34,7 @@ public:
         m_chainOfStreamDescriptions.push_back(streams);
         for (auto& t : m_transformations)
         {
-            streams[t.m_streamId] = t.m_transfromer->Transform(*streams[t.m_streamId]);
+            streams[t.m_streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*streams[t.m_streamId]));
             m_chainOfStreamDescriptions.push_back(streams);
         }
     }

From fc2b4c64fbdca49950dcedfbb69e79e7759e6e9b Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Mon, 2 May 2016 14:08:21 +0200
Subject: [PATCH 29/51] Fixing tests

---
 .../ImageReader/ImageSlimTransformers.cpp     | 185 ++++++++++++++++++
 .../ImageReader/ImageSlimTransformers.h       |  61 +++++-
 .../Readers/ReaderLib/CompositeTransformer.h  |   4 +
 Source/Readers/ReaderLib/Transformer.h        |   3 +
 4 files changed, 250 insertions(+), 3 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.cpp b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
index 7fc369ee1..24c2c774f 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
@@ -454,4 +454,189 @@ SequenceDataPtr SlimTransposeTransformer::TypedTransform(SequenceDataPtr sequenc
     return result;
 }
 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SlimIntensityTransformer::SlimIntensityTransformer(const ConfigParameters &config) : SlimImageTransformerBase(config)
+{
+    m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
+    std::wstring intFile = config(L"intensityFile", L"");
+    if (intFile.empty())
+    {
+        m_eigVal.release();
+        m_eigVec.release();
+    }
+    else
+    {
+        cv::FileStorage fs;
+        fs.open(msra::strfun::utf8(intFile).c_str(), cv::FileStorage::READ);
+        if (!fs.isOpened())
+            RuntimeError("Could not open file: %ls", intFile.c_str());
+        fs["EigVal"] >> m_eigVal;
+        if (m_eigVal.rows != 1 || m_eigVal.cols != 3 || m_eigVal.channels() != 1)
+            RuntimeError("Invalid EigVal data in file: %ls", intFile.c_str());
+        fs["EigVec"] >> m_eigVec;
+        if (m_eigVec.rows != 3 || m_eigVec.cols != 3 || m_eigVec.channels() != 1)
+            RuntimeError("Invalid EigVec data in file: %ls", intFile.c_str());
+        fs.release();
+    }
+}
+
+void SlimIntensityTransformer::StartEpoch(const EpochConfiguration &config)
+{
+    m_curStdDev = m_stdDev[config.m_epochIndex];
+}
+
+void SlimIntensityTransformer::Apply(size_t id, cv::Mat &mat)
+{
+    UNUSED(id);
+
+    if (m_eigVal.empty() || m_eigVec.empty() || m_curStdDev == 0)
+        return;
+
+    if (mat.type() == CV_64FC(mat.channels()))
+        Apply<double>(mat);
+    else if (mat.type() == CV_32FC(mat.channels()))
+        Apply<float>(mat);
+    else
+        RuntimeError("Unsupported type");
+}
+
+template <typename ElemType>
+void SlimIntensityTransformer::Apply(cv::Mat &mat)
+{
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
+
+    // Using single precision as EigVal and EigVec matrices are single precision.
+    std::normal_distribution<float> d(0, (float)m_curStdDev);
+    cv::Mat alphas(1, 3, CV_32FC1);
+    assert(m_eigVal.rows == 1 && m_eigVec.cols == 3);
+    alphas.at<float>(0) = d(*rng) * m_eigVal.at<float>(0);
+    alphas.at<float>(1) = d(*rng) * m_eigVal.at<float>(1);
+    alphas.at<float>(2) = d(*rng) * m_eigVal.at<float>(2);
+    m_rngs.push(std::move(rng));
+
+    assert(m_eigVec.rows == 3 && m_eigVec.cols == 3);
+
+    cv::Mat shifts = m_eigVec * alphas.t();
+
+    // For multi-channel images data is in BGR format.
+    size_t cdst = mat.rows * mat.cols * mat.channels();
+    ElemType* pdstBase = reinterpret_cast<ElemType*>(mat.data);
+    for (ElemType* pdst = pdstBase; pdst < pdstBase + cdst;)
+    {
+        for (int c = 0; c < mat.channels(); c++)
+        {
+            float shift = shifts.at<float>(mat.channels() - c - 1);
+            *pdst = std::min(std::max(*pdst + shift, (ElemType)0), (ElemType)255);
+            pdst++;
+        }
+    }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+SlimColorTransformer::SlimColorTransformer(const ConfigParameters &config) : SlimImageTransformerBase(config)
+{
+    m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
+    m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
+    m_saturationRadius = config(L"saturationRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
+}
+
+void SlimColorTransformer::StartEpoch(const EpochConfiguration &config)
+{
+    m_curBrightnessRadius = m_brightnessRadius[config.m_epochIndex];
+    if (!(0 <= m_curBrightnessRadius && m_curBrightnessRadius <= 1.0))
+        InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0");
+
+    m_curContrastRadius = m_contrastRadius[config.m_epochIndex];
+    if (!(0 <= m_curContrastRadius && m_curContrastRadius <= 1.0))
+        InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0");
+
+    m_curSaturationRadius = m_saturationRadius[config.m_epochIndex];
+    if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0))
+        InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
+}
+
+void SlimColorTransformer::Apply(size_t id, cv::Mat &mat)
+{
+    UNUSED(id);
+
+    if (m_curBrightnessRadius == 0 && m_curContrastRadius == 0 && m_curSaturationRadius == 0)
+        return;
+
+    if (mat.type() == CV_64FC(mat.channels()))
+        Apply<double>(mat);
+    else if (mat.type() == CV_32FC(mat.channels()))
+        Apply<float>(mat);
+    else
+        RuntimeError("Unsupported type");
+}
+
+template <typename ElemType>
+void SlimColorTransformer::Apply(cv::Mat &mat)
+{
+    auto seed = GetSeed();
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
+
+    if (m_curBrightnessRadius > 0 || m_curContrastRadius > 0)
+    {
+        // To change brightness and/or contrast the following standard transformation is used:
+        // Xij = alpha * Xij + beta, where
+        // alpha is a contrast adjustment and beta - brightness adjustment.
+        ElemType beta = 0;
+        if (m_curBrightnessRadius > 0)
+        {
+            UniRealT d(-m_curBrightnessRadius, m_curBrightnessRadius);
+            // Compute mean value of the image.
+            cv::Scalar imgMean = cv::sum(cv::sum(mat));
+            // Compute beta as a fraction of the mean.
+            beta = (ElemType)(d(*rng) * imgMean[0] / (mat.rows * mat.cols * mat.channels()));
+        }
+
+        ElemType alpha = 1;
+        if (m_curContrastRadius > 0)
+        {
+            UniRealT d(-m_curContrastRadius, m_curContrastRadius);
+            alpha = (ElemType)(1 + d(*rng));
+        }
+
+        // Could potentially use mat.convertTo(mat, -1, alpha, beta) 
+        // but it does not do range checking for single/double precision matrix. saturate_cast won't work either.
+        size_t count = mat.rows * mat.cols * mat.channels();
+        ElemType* pbase = reinterpret_cast<ElemType*>(mat.data);
+        for (ElemType* p = pbase; p < pbase + count; p++)
+        {
+            *p = std::min(std::max(*p * alpha + beta, (ElemType)0), (ElemType)255);
+        }
+    }
+
+    if (m_curSaturationRadius > 0 && mat.channels() == 3)
+    {
+        UniRealT d(-m_curSaturationRadius, m_curSaturationRadius);
+        double ratio = 1.0 + d(*rng);
+        assert(0 <= ratio && ratio <= 2);
+
+        auto hsv = m_hsvTemp.pop_or_create([]() { return std::make_unique<cv::Mat>(); });
+
+        // To change saturation, we need to convert the image to HSV format first,
+        // the change S channgel and convert the image back to BGR format.
+        cv::cvtColor(mat, *hsv, CV_BGR2HSV);
+        assert(hsv->rows == mat.rows && hsv->cols == mat.cols);
+        size_t count = hsv->rows * hsv->cols * mat.channels();
+        ElemType* phsvBase = reinterpret_cast<ElemType*>(hsv->data);
+        for (ElemType* phsv = phsvBase; phsv < phsvBase + count; phsv += 3)
+        {
+            const int HsvIndex = 1;
+            phsv[HsvIndex] = std::min((ElemType)(phsv[HsvIndex] * ratio), (ElemType)1);
+        }
+        cv::cvtColor(*hsv, mat, CV_HSV2BGR);
+
+        m_hsvTemp.push(std::move(hsv));
+    }
+
+    m_rngs.push(std::move(rng));
+}
+
+
 }}}
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
index b69c04b70..22c0c9729 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.h
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.h
@@ -11,6 +11,7 @@
 
 #include "Transformer.h"
 #include "ConcStack.h"
+#include "Config.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -23,6 +24,8 @@ class SlimImageTransformerBase : public SlimTransformer
 public:
     explicit SlimImageTransformerBase(const ConfigParameters& config);
 
+    void StartEpoch(const EpochConfiguration&) override {}
+
     // Transformation of the stream.
     StreamDescription Transform(const StreamDescription& inputStream) override;
 
@@ -126,12 +129,13 @@ class SlimTransposeTransformer : public SlimTransformer
 public:
     explicit SlimTransposeTransformer(const ConfigParameters& config);
 
+    void StartEpoch(const EpochConfiguration&) override {}
+
     // Transformation of the stream.
-    StreamDescription Transform(const StreamDescription& inputStream);
+    StreamDescription Transform(const StreamDescription& inputStream) override;
 
     // Transformation of the sequence.
-    SequenceDataPtr Transform(SequenceDataPtr sequence);
-
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
 
 private:
     template <class TElement>
@@ -141,4 +145,55 @@ private:
     StreamDescription m_outputStream;
 };
 
+// Intensity jittering based on PCA transform as described in original AlexNet paper
+// (http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
+// Currently uses precomputed values from 
+// https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua
+// but should be replaced with per-class values?
+class SlimIntensityTransformer : public SlimImageTransformerBase
+{
+public:
+    explicit SlimIntensityTransformer(const ConfigParameters& config);
+
+    void StartEpoch(const EpochConfiguration &config) override;
+    void Apply(size_t id, cv::Mat &mat) override;
+
+private:
+    template <typename ElemType>
+    void Apply(cv::Mat &mat);
+
+    doubleargvector m_stdDev;
+    double m_curStdDev;
+
+    cv::Mat m_eigVal;
+    cv::Mat m_eigVec;
+
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+};
+
+// Color jittering transform based on the paper: http://arxiv.org/abs/1312.5402
+// In short, the transform randomly changes contrast, brightness and color of the image.
+class SlimColorTransformer : public SlimImageTransformerBase
+{
+public:
+    explicit SlimColorTransformer(const ConfigParameters& config);
+    void StartEpoch(const EpochConfiguration &config) override;
+    void Apply(size_t id, cv::Mat &mat) override;
+
+private:
+    template <typename ElemType>
+    void Apply(cv::Mat &mat);
+
+    doubleargvector m_brightnessRadius;
+    double m_curBrightnessRadius;
+    doubleargvector m_contrastRadius;
+    double m_curContrastRadius;
+    doubleargvector m_saturationRadius;
+    double m_curSaturationRadius;
+
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+    conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;
+};
+
+
 }}}
diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/CompositeTransformer.h
index 7072ca237..069581a31 100644
--- a/Source/Readers/ReaderLib/CompositeTransformer.h
+++ b/Source/Readers/ReaderLib/CompositeTransformer.h
@@ -43,6 +43,10 @@ public:
     virtual void StartEpoch(const EpochConfiguration &config) override
     {
         assert(m_next != nullptr);
+        for (auto& t : m_transformations)
+        {
+            t.m_transfromer->StartEpoch(config);
+        }
         m_next->StartEpoch(config);
     }
 
diff --git a/Source/Readers/ReaderLib/Transformer.h b/Source/Readers/ReaderLib/Transformer.h
index 342c1ab63..f71398d3e 100644
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@@ -61,6 +61,9 @@ public:
 class SlimTransformer
 {
 public:
+    // Starts a new epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) = 0;
+
     // Transformation of the stream.
     virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;
 

From 26e59d0d176319b6690f44aaebb07498bbdf34ab Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 3 May 2016 12:58:16 +0200
Subject: [PATCH 30/51] Putting first implementation in place

---
 .../CompositeDataReader.cpp                   | 105 +++++++++++++++---
 .../CompositeDataReader/CompositeDataReader.h |  17 ++-
 Source/Readers/ImageReader/Exports.cpp        |  57 +++++++++-
 .../Readers/ImageReader/ImageConfigHelper.h   |   2 +
 .../ImageReader/ImageDataDeserializer.cpp     |  59 +++++++++-
 .../ImageReader/ImageDataDeserializer.h       |   6 +-
 Source/Readers/ImageReader/ImageReader.cpp    |  16 +--
 .../ImageReader/ImageSlimTransformers.cpp     |  19 ++--
 .../ImageReader/ImageSlimTransformers.h       |   2 +
 .../Readers/ReaderLib/CompositeTransformer.h  |  25 ++++-
 Source/Readers/ReaderLib/ConfigUtil.h         |  29 +++++
 .../Readers/ReaderLib/DataDeserializerBase.h  |   1 +
 Source/Readers/ReaderLib/ReaderLib.vcxproj    |   3 +-
 .../EndToEndTests/Image/AlexNet/AlexNet.cntk  |  77 +++++++------
 14 files changed, 337 insertions(+), 81 deletions(-)
 create mode 100644 Source/Readers/ReaderLib/ConfigUtil.h

diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 5bbcba163..3d1cf8f49 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -20,8 +20,9 @@
 #include "FramePacker.h"
 #include "SequencePacker.h"
 #include "TruncatedBpttPacker.h"
-#include "HeapMemoryProvider.h"
 #include "CorpusDescriptor.h"
+#include "CompositeTransformer.h"
+#include "ConfigUtil.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -55,37 +56,60 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
         m_packingMode = PackingMode::sequence;
     }
 
-    // Whether we need to check data between different deserializers.
-    bool cleanse = config(L"checkData", false);
+    m_precision = config("precision", "float");
 
     // Creating deserializers.
     // TODO: Currently the primary deserializer defines the corpus. The logic will be moved to CorpusDescriptor class.
     CreateDeserializers(config);
 
-    // Bundling deserializers together.
-    // TODO: Add transformers in between.
-    auto bundler = std::make_shared<Bundler>(config, m_deserializers[0], m_deserializers, cleanse);
+    if (m_deserializers.empty())
+    {
+        InvalidArgument("Could not fine deserializers in the reader config.");
+    }
+
+    IDataDeserializerPtr deserializer = m_deserializers.front();
+    if (m_deserializers.size() > 1)
+    {
+        // Bundling deserializers together.
+        // Option whether we need to check data between different deserializers.
+        bool cleanse = config(L"checkData", false);
+        deserializer = std::make_shared<Bundler>(config, deserializer, m_deserializers, cleanse);
+    }
 
     int verbosity = config(L"verbosity", 2);
 
     // Pick up the randomizer.
     bool randomize = config(L"randomize", false);
+
+    // TODO: randomizer should not be a transformer.
+    TransformerPtr randomizer;
     if (randomize)
     {
         // By default randomizing the whole data set.
         size_t randomizationWindow = config(L"randomizationWindow", requestDataSize);
-        m_randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, bundler, BlockRandomizer::DecimationMode::chunk, true);
+        randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, BlockRandomizer::DecimationMode::chunk, true);
     }
     else
     {
-        m_randomizer = std::make_shared<NoRandomizer>(bundler);
+        randomizer = std::make_shared<NoRandomizer>(deserializer);
     }
 
-    m_randomizer->Initialize(nullptr, config);
+    randomizer->Initialize(nullptr, config);
+
+    if (!m_transforms.empty())
+    {
+        m_transformer = std::make_shared<CompositeTransformer>(m_transforms);
+        m_transformer->Initialize(randomizer, config);
+    }
+    else
+    {
+        m_transformer = randomizer;
+    }
 
     // Create output stream descriptions - where to get those? from config? what if it is not the same as network expects?
-    // TODO: Currently only sparse streams.
-    for (const auto& streamDescription : bundler->GetStreamDescriptions())
+    // TODO: Currently only dense output streams.
+    // TODO: Check here. We should already support repacking sparse into dense in the shim/matrix.
+    for (const auto& streamDescription : m_transformer->GetStreamDescriptions())
     {
         StreamDescriptionPtr stream = std::make_shared<StreamDescription>(*streamDescription);
         stream->m_storageType = StorageType::dense;
@@ -138,10 +162,61 @@ IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParamet
         RuntimeError("Cannot create deserializer. Please check module and type in the configuration.");
     }
 
+    CreateTransforms(deserializerConfig);
+
+
     assert(d != nullptr);
     return IDataDeserializerPtr(d);
 }
 
+void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
+{
+    std::string defaultModule = deserializerConfig("module");
+    argvector<ConfigParameters> inputs = deserializerConfig("inputs");
+    for (size_t i = 0; i < inputs.size(); ++i)
+    {
+        auto inputSections = TryGetSectionsWithParameter(inputs[i], "transforms");
+        if (inputSections.size() > 1)
+        {
+            LogicError("Only a single 'transforms' config is allowed per stream.");
+        }
+
+        if (inputSections.empty())
+        {
+            continue;
+        }
+
+        ConfigParameters input = inputs[i](inputSections.front());
+        std::wstring inputName = msra::strfun::utf16(input.ConfigName());
+
+        argvector<ConfigParameters> transforms = input("transforms");
+        for (size_t j = 0; j < transforms.size(); ++j)
+        {
+            SlimTransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
+            m_transforms.push_back(Transformation{transformer, inputName});
+        }
+    }
+
+}
+
+SlimTransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
+{
+    typedef bool(*TransformerFactory) (SlimTransformer** t, const std::wstring& type, const ConfigParameters& cfg);
+
+    std::string transformerModule = config("module", defaultModule.c_str());
+    TransformerFactory f = (TransformerFactory)Plugin::Load(transformerModule, "CreateTransformer");
+
+    std::wstring transformerType = config("type");
+    SlimTransformer* t;
+    if (!f(&t, transformerType, config))
+    {
+        RuntimeError("Cannot create transformer. Please check module and type in the configuration.");
+    }
+
+    assert(t != nullptr);
+    return SlimTransformerPtr(t);
+}
+
 void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
 {
     EpochConfiguration config = cfg;
@@ -151,7 +226,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
         RuntimeError("Unsupported minibatch size '%d'.", (int)config.m_totalEpochSizeInSamples);
     }
 
-    m_randomizer->StartEpoch(config);
+    m_transformer->StartEpoch(config);
 
     // TODO: As the next step the packers should be moved into the network.
     switch (m_packingMode)
@@ -159,13 +234,13 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
     case PackingMode::sample:
         m_packer = std::make_shared<FramePacker>(
             m_provider,
-            m_randomizer,
+            m_transformer,
             m_streams);
         break;
     case PackingMode::sequence:
         m_packer = std::make_shared<SequencePacker>(
             m_provider,
-            m_randomizer,
+            m_transformer,
             m_streams);
         break;
     case PackingMode::truncated:
@@ -173,7 +248,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
         config.m_truncationSize = m_truncationLength;
         m_packer = std::make_shared<TruncatedBPTTPacker>(
             m_provider,
-            m_randomizer,
+            m_transformer,
             m_streams);
         break;
     }
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.h b/Source/Readers/CompositeDataReader/CompositeDataReader.h
index 08d938019..4fe65693c 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@@ -9,7 +9,9 @@
 #include <string>
 #include <future>
 #include "DataReader.h"
-#include <Reader.h>
+#include "Reader.h"
+#include "Transformer.h"
+#include "CompositeTransformer.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -49,7 +51,6 @@ struct Minibatch;
 // In order not to break existing configs and allow deserializers composition it exposes the same interface as the old readers, but it is not exposed
 // to external developers. The actual "reader developer" now has to provide deserializer(s) only.
 // TODO: Implement proper corpus descriptor.
-// TODO: Add transformers as the next step.
 // TODO: Same code as in ReaderLib shim, the one in the ReaderLib will be deleted as the next step.
 // TODO: Change this interface when SGD is changed.
 class CompositeDataReader : public Reader, protected Plugin
@@ -68,7 +69,10 @@ public:
 
 private:
     void CreateDeserializers(const ConfigParameters& readerConfig);
+    void CreateTransforms(const ConfigParameters& deserializerConfig);
+
     IDataDeserializerPtr CreateDeserializer(const ConfigParameters& readerConfig, bool primary);
+    SlimTransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule);
 
 
     enum class PackingMode
@@ -103,9 +107,12 @@ private:
     // A list of deserializers.
     std::vector<IDataDeserializerPtr> m_deserializers;
 
-    // Randomizer.
-    // TODO: remove Transformer interface from randomizer.
-    TransformerPtr m_randomizer;
+    // A list of transformers.
+    std::vector<Transformation> m_transforms;
+
+    // First transformer.
+    // TODO: change to iterator.
+    TransformerPtr m_transformer;
 
     // TODO: Should be removed. We already have matrices on this level.
     // Should just get the corresponding pinned memory.
diff --git a/Source/Readers/ImageReader/Exports.cpp b/Source/Readers/ImageReader/Exports.cpp
index 2b58210f9..3b2bdb7fb 100644
--- a/Source/Readers/ImageReader/Exports.cpp
+++ b/Source/Readers/ImageReader/Exports.cpp
@@ -11,7 +11,9 @@
 #include "ReaderShim.h"
 #include "ImageReader.h"
 #include "HeapMemoryProvider.h"
-#include "CudaMemoryProvider.h"
+#include "ImageDataDeserializer.h"
+#include "ImageSlimTransformers.h"
+#include "CorpusDescriptor.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -32,4 +34,57 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader)
     *preader = new ReaderShim<double>(factory);
 }
 
+// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
+extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
+{
+    if (type == L"ImageDataDeserializer")
+    {
+        *deserializer = new ImageDataDeserializer(corpus, deserializerConfig);
+    }
+    else
+    {
+        // Unknown type.
+        return false;
+    }
+
+    // Deserializer created.
+    return true;
+}
+
+extern "C" DATAREADER_API bool CreateTransformer(SlimTransformer** transformer, const std::wstring& type, const ConfigParameters& config)
+{
+    if (type == L"Crop")
+    {
+        *transformer = new SlimCropTransformer(config);
+    }
+    else if (type == L"Scale")
+    {
+        *transformer = new SlimScaleTransformer(config);
+    }
+    else if (type == L"Color")
+    {
+        *transformer = new SlimColorTransformer(config);
+    }
+    else if (type == L"Intensity")
+    {
+        *transformer = new SlimIntensityTransformer(config);
+    }
+    else if (type == L"Mean")
+    {
+        *transformer = new SlimMeanTransformer(config);
+    }
+    else if (type == L"Transpose")
+    {
+        *transformer = new SlimTransposeTransformer(config);
+    }
+    else
+    {
+        // Unknown type.
+        return false;
+    }
+
+    // Deserializer created.
+    return true;
+}
+
 }}}
diff --git a/Source/Readers/ImageReader/ImageConfigHelper.h b/Source/Readers/ImageReader/ImageConfigHelper.h
index 689ce8349..7bec200c7 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.h
+++ b/Source/Readers/ImageReader/ImageConfigHelper.h
@@ -83,5 +83,7 @@ private:
     CropType m_cropType;
 };
 
+std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName);
+
 typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
 } } }
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.cpp b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
index 3133bfc11..61f1bd34d 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
@@ -11,6 +11,7 @@
 #include <limits>
 #include "ImageDataDeserializer.h"
 #include "ImageConfigHelper.h"
+#include <StringUtil.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -115,13 +116,63 @@ public:
     }
 };
 
+// TODO: Provide only sequences specified in the corpus descriptor.
+ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr, const ConfigParameters& config)
+{
+    ConfigParameters inputs = config("inputs");
+    std::vector<std::string> featureNames = GetSectionsWithParameter(inputs, "transforms");
+    std::vector<std::string> labelNames = GetSectionsWithParameter(inputs, "labelDim");
+
+    // TODO: currently support only one feature and label section.
+    if (featureNames.size() != 1 || labelNames.size() != 1)
+    {
+        RuntimeError(
+            "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
+            static_cast<int>(featureNames.size()),
+            static_cast<int>(labelNames.size()));
+    }
+
+    string precision = (ConfigValue)config("precision", "float");
+
+    // Feature stream.
+    ConfigParameters featureSection = inputs(featureNames[0]);
+    auto features = std::make_shared<StreamDescription>();
+    features->m_id = 0;
+    features->m_name = msra::strfun::utf16(featureSection.ConfigName());
+    features->m_storageType = StorageType::dense;
+    features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
+    m_streams.push_back(features);
+
+    // Label stream.
+    ConfigParameters label = inputs(labelNames[0]);
+    size_t labelDimension = label("labelDim");
+    auto labels = std::make_shared<StreamDescription>();
+    labels->m_id = 1;
+    labels->m_name = msra::strfun::utf16(label.ConfigName());
+    labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
+    labels->m_storageType = StorageType::dense;
+    labels->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
+    m_streams.push_back(labels);
+
+    m_labelGenerator = labels->m_elementType == ElementType::tfloat ?
+        (LabelGeneratorPtr)std::make_shared<TypedLabelGenerator<float>>(labelDimension) :
+        std::make_shared<TypedLabelGenerator<double>>(labelDimension);
+
+    m_grayscale = config(L"grayscale", false);
+
+    // TODO: multiview should be done on the level of randomizer/transformers - it is responsiblity of the
+    // TODO: randomizer to collect how many copies each transform needs and request same sequence several times.
+    bool multiViewCrop = config(L"multiViewCrop", false);
+    CreateSequenceDescriptions(config(L"file"), labelDimension, multiViewCrop);
+}
+
 ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
 {
     ImageConfigHelper configHelper(config);
     m_streams = configHelper.GetStreams();
     assert(m_streams.size() == 2);
     m_grayscale = configHelper.UseGrayscale();
-	const auto& label = m_streams[configHelper.GetLabelStreamId()];
+    const auto& label = m_streams[configHelper.GetLabelStreamId()];
     const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
 
     // Expect data in HWC.
@@ -147,7 +198,7 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
         RuntimeError("Unsupported label element type '%d'.", (int)label->m_elementType);
     }
 
-    CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension, configHelper);
+    CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension, configHelper.IsMultiViewCrop());
 }
 
 // Descriptions of chunks exposed by the image reader.
@@ -173,7 +224,7 @@ void ImageDataDeserializer::GetSequencesForChunk(size_t chunkId, std::vector<Seq
     result.push_back(m_imageSequences[chunkId]);
 }
 
-void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, const ImageConfigHelper& config)
+void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, bool isMultiCrop)
 {
     std::ifstream mapFile(mapPath);
     if (!mapFile)
@@ -181,7 +232,7 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
         RuntimeError("Could not open %s for reading.", mapPath.c_str());
     }
 
-    size_t itemsPerLine = config.IsMultiViewCrop() ? 10 : 1;
+    size_t itemsPerLine = isMultiCrop ? 10 : 1;
     size_t curId = 0;
     std::string line;
     PathReaderMap knownReaders;
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.h b/Source/Readers/ImageReader/ImageDataDeserializer.h
index c5794601d..265266846 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.h
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.h
@@ -10,6 +10,7 @@
 #include "ByteReader.h"
 #include "ImageConfigHelper.h"
 #include <unordered_map>
+#include "CorpusDescriptor.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -21,6 +22,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 class ImageDataDeserializer : public DataDeserializerBase
 {
 public:
+    ImageDataDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config);
+
+    // TODO: This constructor should be deprecated. Compositional config should be used instead.
     explicit ImageDataDeserializer(const ConfigParameters& config);
 
     // Gets sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
@@ -34,7 +38,7 @@ public:
 
 private:
     // Creates a set of sequence descriptions.
-    void CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, const ImageConfigHelper& config);
+    void CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, bool isMultiCrop);
 
     // Image sequence descriptions. Currently, a sequence contains a single sample only.
     struct ImageSequenceDescription : public SequenceDescription
diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index 47af5d373..5bcfdd2f5 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -54,20 +54,20 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
 
     randomizer->Initialize(nullptr, config);
 
-    size_t featureStreamId = configHelper.GetFeatureStreamId();
-    ConfigParameters featureStream = config(m_streams[featureStreamId]->m_name);
+    std::wstring featureName = m_streams[configHelper.GetFeatureStreamId()]->m_name;
+    ConfigParameters featureStream = config(featureName);
 
     // Create transformations.
     std::vector<Transformation> transformations;
-    transformations.push_back(Transformation{ std::make_shared<SlimCropTransformer>(featureStream), featureStreamId });
-    transformations.push_back(Transformation{ std::make_shared<SlimScaleTransformer>(featureStream), featureStreamId });
-    transformations.push_back(Transformation{ std::make_shared<SlimColorTransformer>(featureStream), featureStreamId });
-    transformations.push_back(Transformation{ std::make_shared<SlimIntensityTransformer>(featureStream), featureStreamId });
-    transformations.push_back(Transformation{ std::make_shared<SlimMeanTransformer>(featureStream), featureStreamId });
+    transformations.push_back(Transformation{ std::make_shared<SlimCropTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<SlimScaleTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<SlimColorTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<SlimIntensityTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<SlimMeanTransformer>(featureStream), featureName });
 
     if (configHelper.GetDataFormat() == CHW)
     {
-        transformations.push_back(Transformation{ std::make_shared<SlimTransposeTransformer>(featureStream), featureStreamId });
+        transformations.push_back(Transformation{ std::make_shared<SlimTransposeTransformer>(featureStream), featureName });
     }
 
     m_transformer = std::make_shared<CompositeTransformer>(transformations);
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.cpp b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
index 24c2c774f..8db857e07 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
@@ -303,6 +303,14 @@ SlimScaleTransformer::SlimScaleTransformer(const ConfigParameters& config) : Sli
         m_interp.push_back(cv::INTER_LINEAR);
 }
 
+StreamDescription SlimScaleTransformer::Transform(const StreamDescription& inputStream)
+{
+    SlimImageTransformerBase::Transform(inputStream);
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(m_imgWidth, m_imgHeight, m_imgChannels).AsTensorShape(HWC));
+    return m_outputStream;
+}
+
+
 void SlimScaleTransformer::Apply(size_t id, cv::Mat &mat)
 {
     UNUSED(id);
@@ -383,18 +391,15 @@ SlimTransposeTransformer::SlimTransposeTransformer(const ConfigParameters&)
 StreamDescription SlimTransposeTransformer::Transform(const StreamDescription& inputStream)
 {
     m_inputStream = inputStream;
-
-    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
-
-    // Changing from NHWC to NCHW
-    m_outputStream = m_inputStream;
-    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
-
     if (m_inputStream.m_storageType != StorageType::dense)
     {
         LogicError("Transpose transformer supports only dense streams.");
     }
 
+    // Changing from NHWC to NCHW
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
+    m_outputStream = m_inputStream;
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
     return m_outputStream;
 }
 
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
index 22c0c9729..9407a9b3a 100644
--- a/Source/Readers/ImageReader/ImageSlimTransformers.h
+++ b/Source/Readers/ImageReader/ImageSlimTransformers.h
@@ -99,6 +99,8 @@ class SlimScaleTransformer : public SlimImageTransformerBase
 public:
     explicit SlimScaleTransformer(const ConfigParameters& config);
 
+    StreamDescription Transform(const StreamDescription& inputStream) override;
+
 private:
     virtual void Apply(size_t id, cv::Mat &mat) override;
 
diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/CompositeTransformer.h
index 069581a31..7bf65bec1 100644
--- a/Source/Readers/ReaderLib/CompositeTransformer.h
+++ b/Source/Readers/ReaderLib/CompositeTransformer.h
@@ -14,14 +14,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 struct Transformation
 {
     SlimTransformerPtr m_transfromer;
-    size_t m_streamId;
+    std::wstring m_streamName;
 };
 
 class CompositeTransformer : public Transformer
 {
 public:
-    CompositeTransformer(const std::vector<Transformation>& transformations) : m_transformations(transformations)
+    CompositeTransformer(const std::vector<Transformation>& transformations)
     {
+        for (const auto& t: transformations)
+        {
+            m_transformations.push_back(std::make_pair(t, 0ul));
+        }
     }
 
     // Initializes the transformer.
@@ -34,7 +38,16 @@ public:
         m_chainOfStreamDescriptions.push_back(streams);
         for (auto& t : m_transformations)
         {
-            streams[t.m_streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*streams[t.m_streamId]));
+            // filling in stream id for the transform
+            for (const auto& s: streams)
+            {
+                if (s->m_name == t.first.m_streamName)
+                {
+                    t.second = s->m_id;
+                }
+            }
+
+            streams[t.second] = std::make_shared<StreamDescription>(t.first.m_transfromer->Transform(*streams[t.second]));
             m_chainOfStreamDescriptions.push_back(streams);
         }
     }
@@ -45,7 +58,7 @@ public:
         assert(m_next != nullptr);
         for (auto& t : m_transformations)
         {
-            t.m_transfromer->StartEpoch(config);
+            t.first.m_transfromer->StartEpoch(config);
         }
         m_next->StartEpoch(config);
     }
@@ -72,7 +85,7 @@ public:
         {
             for (auto& t : m_transformations)
             {
-                sequences.m_data[t.m_streamId][j] = t.m_transfromer->Transform(sequences.m_data[t.m_streamId][j]);
+                sequences.m_data[t.second][j] = t.first.m_transfromer->Transform(sequences.m_data[t.second][j]);
             }
         }
 
@@ -81,7 +94,7 @@ public:
 
 private:
     TransformerPtr m_next;
-    std::vector<Transformation> m_transformations;
+    std::vector<std::pair<Transformation, size_t>> m_transformations;
     std::vector<std::vector<StreamDescriptionPtr>> m_chainOfStreamDescriptions;
 };
 
diff --git a/Source/Readers/ReaderLib/ConfigUtil.h b/Source/Readers/ReaderLib/ConfigUtil.h
new file mode 100644
index 000000000..8252186fe
--- /dev/null
+++ b/Source/Readers/ReaderLib/ConfigUtil.h
@@ -0,0 +1,29 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "Config.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Helper function to get sections that contains specified parameter.
+inline std::vector<std::string> TryGetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
+{
+    std::vector<std::string> sectionNames;
+    for (const std::pair<std::string, ConfigParameters>& section : config)
+    {
+        if (section.second.ExistsCurrent(parameterName))
+        {
+            sectionNames.push_back(section.first);
+        }
+    }
+
+    return sectionNames;
+}
+
+}}}
diff --git a/Source/Readers/ReaderLib/DataDeserializerBase.h b/Source/Readers/ReaderLib/DataDeserializerBase.h
index ebcd6c39d..908eb6f0c 100644
--- a/Source/Readers/ReaderLib/DataDeserializerBase.h
+++ b/Source/Readers/ReaderLib/DataDeserializerBase.h
@@ -6,6 +6,7 @@
 #pragma once
 
 #include "DataDeserializer.h"
+#include <Config.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj
index 5dc2ceba8..3c4701941 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj
@@ -40,6 +40,7 @@
     </ClCompile>
   </ItemDefinitionGroup>
   <ItemGroup>
+    <ClInclude Include="ConfigUtil.h" />
     <ClInclude Include="CorpusDescriptor.h" />
     <ClInclude Include="Bundler.h" />
     <ClInclude Include="ChunkRandomizer.h" />
@@ -79,4 +80,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index 907a1d51a..94ef838af 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -41,39 +41,50 @@ Train=[
         
         numMBsToShowResult=100
     ]
-    
-    reader=[
-        readerType=ImageReader
-        # Map file which maps images to labels using the following format:
-        # <full path to image><tab><numerical label (0-based class id)>
-        # Example:
-        # C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
-        file=$ConfigDir$/train_map.txt
-        # Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
-        randomize=Auto
-        features=[
-            # Below are the required parameters.
-            width=224
-            height=224
-            channels=3
-            # Below are the optional parameters.
-            # Possible values: Center, Random. Default: Center
-            cropType=Random
-            # Horizontal random flip, will be enabled by default if cropType=Random
-            #hflip=0
-            # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
-            cropRatio=0.875
-            # Crop scale ratio jitter type.
-            # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
-            jitterType=UniRatio
-            # Interpolation to use when scaling image to width x height size.
-            # Possible values: nearest, linear, cubic, lanczos. Default: linear.
-            interpolations=Linear
-            # Stores mean values for each pixel in OpenCV matrix XML format.
-            meanFile=$ConfigDir$/ImageNet1K_mean.xml
-        ]
-        labels=[
-            labelDim=1000
+
+    reader = [
+        verbosity = 0
+        randomize = true
+
+        # A list of deserializers to use.
+        deserializers = [
+            [   
+                type = "ImageDataDeserializer"        
+                module = "ImageReader"
+
+                # Map file which maps images to labels using the following format:
+                # <full path to image><tab><numerical label (0-based class id)>
+                # Example:
+                # C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
+                file=$ConfigDir$/train_map.txt
+
+                # Description of input streams
+                inputs = [
+                    features=[
+                        transforms=[
+                            [
+                                type="Crop"
+                                cropType=Random
+                                ratio=0.875
+                                jitterType=UniRatio
+                            ]:[
+                                type="Scale"
+                                width=224
+                                height=224
+                                channels=3
+                            ]:[
+                                type="Mean"
+                                file=$ConfigDir$/ImageNet1K_mean.xml
+                            ]:[
+                                type="Transpose"
+                            ]
+                        ]
+                    ]
+                    labels=[
+                        labelDim=1000
+                    ]
+                ]
+            ]
         ]
     ]    
 ]

From 7389f78968ce9e7640a13bacac2c68b1abb0ca34 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 3 May 2016 15:35:02 +0200
Subject: [PATCH 31/51] Fixing some bugs

---
 .../CompositeDataReader.cpp                    | 18 +++++++++++++++++-
 .../ImageReader/ImageDataDeserializer.cpp      |  2 +-
 Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk |  6 ++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 3d1cf8f49..270d711ba 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -23,6 +23,7 @@
 #include "CorpusDescriptor.h"
 #include "CompositeTransformer.h"
 #include "ConfigUtil.h"
+#include <omp.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -30,6 +31,12 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
     m_corpus(std::make_shared<CorpusDescriptor>()),
     m_provider(provider)
 {
+    int threadCount = config(L"numCPUThreads", 0);
+    if (threadCount > 0)
+    {
+        omp_set_num_threads(threadCount);
+    }
+
     // Identifying packing mode.
     bool frameMode = config(L"frameMode", true);
     bool truncated = config(L"truncated", false);
@@ -87,7 +94,16 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
     {
         // By default randomizing the whole data set.
         size_t randomizationWindow = config(L"randomizationWindow", requestDataSize);
-        randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, BlockRandomizer::DecimationMode::chunk, true);
+        bool useLegacyRandomization = config(L"useLegacy", true);
+        bool multithreadedGetNextSequences = false;
+        BlockRandomizer::DecimationMode decimationMode = BlockRandomizer::DecimationMode::chunk;
+        if (!useLegacyRandomization)
+        {
+            decimationMode = BlockRandomizer::DecimationMode::sequence;
+            multithreadedGetNextSequences = false; 
+            randomizationWindow = 1;
+        }
+        randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, decimationMode, useLegacyRandomization, multithreadedGetNextSequences);
     }
     else
     {
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.cpp b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
index 61f1bd34d..88141bdc4 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
@@ -150,7 +150,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr, const ConfigPa
     labels->m_id = 1;
     labels->m_name = msra::strfun::utf16(label.ConfigName());
     labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
-    labels->m_storageType = StorageType::dense;
+    labels->m_storageType = StorageType::sparse_csc;
     labels->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
     m_streams.push_back(labels);
 
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index 94ef838af..d1d1436ac 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -45,6 +45,7 @@ Train=[
     reader = [
         verbosity = 0
         randomize = true
+        useLegacy = false
 
         # A list of deserializers to use.
         deserializers = [
@@ -65,16 +66,17 @@ Train=[
                             [
                                 type="Crop"
                                 cropType=Random
-                                ratio=0.875
+                                cropRatio=0.875
                                 jitterType=UniRatio
                             ]:[
                                 type="Scale"
                                 width=224
                                 height=224
                                 channels=3
+                                interpolations=Linear
                             ]:[
                                 type="Mean"
-                                file=$ConfigDir$/ImageNet1K_mean.xml
+                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
                             ]:[
                                 type="Transpose"
                             ]

From 45d8c87eaba58b931bdaf974d53d4f37e19e59c6 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 3 May 2016 17:29:06 +0200
Subject: [PATCH 32/51] Adapting config

---
 .../CompositeDataReader.cpp                   |  6 ---
 .../EndToEndTests/Image/AlexNet/AlexNet.cntk  | 53 ++++++++++++++-----
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 270d711ba..b4663f459 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -97,12 +97,6 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
         bool useLegacyRandomization = config(L"useLegacy", true);
         bool multithreadedGetNextSequences = false;
         BlockRandomizer::DecimationMode decimationMode = BlockRandomizer::DecimationMode::chunk;
-        if (!useLegacyRandomization)
-        {
-            decimationMode = BlockRandomizer::DecimationMode::sequence;
-            multithreadedGetNextSequences = false; 
-            randomizationWindow = 1;
-        }
         randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, decimationMode, useLegacyRandomization, multithreadedGetNextSequences);
     }
     else
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index d1d1436ac..b1ecfa6ee 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -45,6 +45,10 @@ Train=[
     reader = [
         verbosity = 0
         randomize = true
+
+        # Currently,f for image reader a single sequence is a chunk
+        # so setting randomization window to 1.
+        randomizationWindow =1
         useLegacy = false
 
         # A list of deserializers to use.
@@ -109,18 +113,41 @@ Test=[
     ]
     
     reader=[
-        readerType=ImageReader
-        file=$ConfigDir$/val_map.txt
-        randomize=None
-        features=[
-            width=224
-            height=224
-            channels=3
-            cropType=Center
-            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        verbosity = 0
+        randomize = false
+
+        # A list of deserializers to use.
+        deserializers = [
+            [   
+                type = "ImageDataDeserializer"        
+                module = "ImageReader"
+                file=$ConfigDir$/val_map.txt
+
+                # Description of input streams
+                inputs = [
+                    features=[
+                        transforms=[
+                            [
+                                type="Crop"
+                                cropType=Center
+                            ]:[
+                                type="Scale"
+                                width=224
+                                height=224
+                                channels=3
+                            ]:[
+                                type="Mean"
+                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
+                            ]:[
+                                type="Transpose"
+                            ]
+                        ]
+                    ]
+                    labels=[
+                        labelDim=1000
+                    ]
+                ]
+            ]
         ]
-        labels=[
-            labelDim=1000
-        ]
-    ]    
+    ]        
 ]

From 260e53dbf2b25c1e719bb4b280e972d478bccc5c Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 3 May 2016 18:20:53 +0200
Subject: [PATCH 33/51] Fixing filters

---
 Source/Readers/ReaderLib/ReaderLib.vcxproj.filters | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
index 97336400b..c880ccf0b 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
@@ -66,6 +66,7 @@
     </ClInclude>
     <ClInclude Include="..\ExperimentalHTKMLFReader\CorpusDescriptor.h">
       <Filter>Utils</Filter>
+    </ClInclude>
     <ClInclude Include="CompositeTransformer.h">
       <Filter>Transformers</Filter>
     </ClInclude>

From 1a40a3c8d94c1021d8752d896e86b367e469671f Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Tue, 3 May 2016 19:14:58 +0200
Subject: [PATCH 34/51] Some refactoring

---
 .../CNTKTextFormatReader.cpp                  |  13 +-
 .../CNTKTextFormatReader.h                    |   3 +-
 .../CompositeDataReader.cpp                   |  41 +-
 .../CompositeDataReader/CompositeDataReader.h |   7 +-
 .../ExperimentalHTKMLFReader/HTKMLFReader.cpp |   2 -
 .../ExperimentalHTKMLFReader/HTKMLFReader.h   |   4 +-
 Source/Readers/ImageReader/Exports.cpp        |  16 +-
 Source/Readers/ImageReader/ImageReader.cpp    |  25 +-
 Source/Readers/ImageReader/ImageReader.h      |   4 +-
 .../Readers/ImageReader/ImageReader.vcxproj   |   2 -
 .../ImageReader/ImageReader.vcxproj.filters   |  18 +-
 .../ImageReader/ImageSlimTransformers.cpp     | 647 ------------------
 .../ImageReader/ImageSlimTransformers.h       | 201 ------
 .../Readers/ImageReader/ImageTransformers.cpp | 330 ++++-----
 .../Readers/ImageReader/ImageTransformers.h   | 118 ++--
 Source/Readers/ReaderLib/BlockRandomizer.h    |   6 +-
 .../Readers/ReaderLib/CompositeTransformer.h  |  57 +-
 Source/Readers/ReaderLib/FramePacker.h        |   8 +-
 Source/Readers/ReaderLib/NoRandomizer.cpp     |   4 -
 Source/Readers/ReaderLib/NoRandomizer.h       |   7 +-
 Source/Readers/ReaderLib/PackerBase.cpp       |   6 +-
 Source/Readers/ReaderLib/PackerBase.h         |   7 +-
 Source/Readers/ReaderLib/ReaderLib.vcxproj    |   1 +
 .../ReaderLib/ReaderLib.vcxproj.filters       |  12 +-
 Source/Readers/ReaderLib/SequenceEnumerator.h |  55 ++
 Source/Readers/ReaderLib/SequencePacker.cpp   |   2 +-
 Source/Readers/ReaderLib/SequencePacker.h     |   4 +-
 Source/Readers/ReaderLib/Transformer.h        |  64 +-
 Source/Readers/ReaderLib/TransformerBase.h    |   1 +
 .../Readers/ReaderLib/TruncatedBpttPacker.cpp |   6 +-
 .../Readers/ReaderLib/TruncatedBpttPacker.h   |   3 +-
 31 files changed, 352 insertions(+), 1322 deletions(-)
 delete mode 100644 Source/Readers/ImageReader/ImageSlimTransformers.cpp
 delete mode 100644 Source/Readers/ImageReader/ImageSlimTransformers.h
 create mode 100644 Source/Readers/ReaderLib/SequenceEnumerator.h

diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
index 52d33d07b..2fc53233f 100644
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
@@ -31,27 +31,22 @@ CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
             m_deserializer = shared_ptr<IDataDeserializer>(new TextParser<double>(configHelper));
         }
 
-        TransformerPtr randomizer;
         size_t window = configHelper.GetRandomizationWindow();
         if (window > 0)
         {
             // Verbosity is a general config parameter, not specific to the text format reader.
             int verbosity = config(L"verbosity", 2);
-            randomizer = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
+            m_sequenceEnumerator = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
         }
         else
         {
-            randomizer = std::make_shared<NoRandomizer>(m_deserializer);
+            m_sequenceEnumerator = std::make_shared<NoRandomizer>(m_deserializer);
         }
 
-        randomizer->Initialize(nullptr, config);
-
-        m_transformer = randomizer;
-
         // TODO: add "frameMode"  config paramter
         m_packer = std::make_shared<SequencePacker>(
             m_provider,
-            m_transformer,
+            m_sequenceEnumerator,
             GetStreamDescriptions());
     }
     catch (const std::runtime_error& e)
@@ -72,7 +67,7 @@ void CNTKTextFormatReader::StartEpoch(const EpochConfiguration& config)
         RuntimeError("Epoch size cannot be 0.");
     }
 
-    m_transformer->StartEpoch(config);
+    m_sequenceEnumerator->StartEpoch(config);
     m_packer->StartEpoch(config);
 }
 
diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
index affa87a5f..f9589e96c 100644
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
@@ -8,6 +8,7 @@
 #include "TextParser.h"
 #include "Reader.h"
 #include "Packer.h"
+#include "SequenceEnumerator.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -33,7 +34,7 @@ private:
     IDataDeserializerPtr m_deserializer;
 
     // A head transformer in a list of transformers.
-    TransformerPtr m_transformer;
+    SequenceEnumeratorPtr m_sequenceEnumerator;
 
     // Packer.
     PackerPtr m_packer;
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index b4663f459..33f2b50ff 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -87,9 +87,6 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
 
     // Pick up the randomizer.
     bool randomize = config(L"randomize", false);
-
-    // TODO: randomizer should not be a transformer.
-    TransformerPtr randomizer;
     if (randomize)
     {
         // By default randomizing the whole data set.
@@ -97,29 +94,21 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
         bool useLegacyRandomization = config(L"useLegacy", true);
         bool multithreadedGetNextSequences = false;
         BlockRandomizer::DecimationMode decimationMode = BlockRandomizer::DecimationMode::chunk;
-        randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, decimationMode, useLegacyRandomization, multithreadedGetNextSequences);
+        m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, decimationMode, useLegacyRandomization, multithreadedGetNextSequences);
     }
     else
     {
-        randomizer = std::make_shared<NoRandomizer>(deserializer);
+        m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer);
     }
 
-    randomizer->Initialize(nullptr, config);
-
-    if (!m_transforms.empty())
-    {
-        m_transformer = std::make_shared<CompositeTransformer>(m_transforms);
-        m_transformer->Initialize(randomizer, config);
-    }
-    else
-    {
-        m_transformer = randomizer;
-    }
+    m_sequenceEnumerator = m_transforms.empty()
+        ? m_sequenceEnumerator 
+        : std::make_shared<TransformController>(m_transforms, m_sequenceEnumerator);
 
     // Create output stream descriptions - where to get those? from config? what if it is not the same as network expects?
     // TODO: Currently only dense output streams.
     // TODO: Check here. We should already support repacking sparse into dense in the shim/matrix.
-    for (const auto& streamDescription : m_transformer->GetStreamDescriptions())
+    for (const auto& streamDescription : m_sequenceEnumerator->GetStreamDescriptions())
     {
         StreamDescriptionPtr stream = std::make_shared<StreamDescription>(*streamDescription);
         stream->m_storageType = StorageType::dense;
@@ -202,29 +191,29 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
         argvector<ConfigParameters> transforms = input("transforms");
         for (size_t j = 0; j < transforms.size(); ++j)
         {
-            SlimTransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
+            TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
             m_transforms.push_back(Transformation{transformer, inputName});
         }
     }
 
 }
 
-SlimTransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
+TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
 {
-    typedef bool(*TransformerFactory) (SlimTransformer** t, const std::wstring& type, const ConfigParameters& cfg);
+    typedef bool(*TransformerFactory) (Transformer** t, const std::wstring& type, const ConfigParameters& cfg);
 
     std::string transformerModule = config("module", defaultModule.c_str());
     TransformerFactory f = (TransformerFactory)Plugin::Load(transformerModule, "CreateTransformer");
 
     std::wstring transformerType = config("type");
-    SlimTransformer* t;
+    Transformer* t;
     if (!f(&t, transformerType, config))
     {
         RuntimeError("Cannot create transformer. Please check module and type in the configuration.");
     }
 
     assert(t != nullptr);
-    return SlimTransformerPtr(t);
+    return TransformerPtr(t);
 }
 
 void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
@@ -236,7 +225,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
         RuntimeError("Unsupported minibatch size '%d'.", (int)config.m_totalEpochSizeInSamples);
     }
 
-    m_transformer->StartEpoch(config);
+    m_sequenceEnumerator->StartEpoch(config);
 
     // TODO: As the next step the packers should be moved into the network.
     switch (m_packingMode)
@@ -244,13 +233,13 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
     case PackingMode::sample:
         m_packer = std::make_shared<FramePacker>(
             m_provider,
-            m_transformer,
+            m_sequenceEnumerator,
             m_streams);
         break;
     case PackingMode::sequence:
         m_packer = std::make_shared<SequencePacker>(
             m_provider,
-            m_transformer,
+            m_sequenceEnumerator,
             m_streams);
         break;
     case PackingMode::truncated:
@@ -258,7 +247,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
         config.m_truncationSize = m_truncationLength;
         m_packer = std::make_shared<TruncatedBPTTPacker>(
             m_provider,
-            m_transformer,
+            m_sequenceEnumerator,
             m_streams);
         break;
     }
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.h b/Source/Readers/CompositeDataReader/CompositeDataReader.h
index 4fe65693c..c170f81a2 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@@ -72,7 +72,7 @@ private:
     void CreateTransforms(const ConfigParameters& deserializerConfig);
 
     IDataDeserializerPtr CreateDeserializer(const ConfigParameters& readerConfig, bool primary);
-    SlimTransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule);
+    TransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule);
 
 
     enum class PackingMode
@@ -110,9 +110,8 @@ private:
     // A list of transformers.
     std::vector<Transformation> m_transforms;
 
-    // First transformer.
-    // TODO: change to iterator.
-    TransformerPtr m_transformer;
+    // Sequence provider.
+    SequenceEnumeratorPtr m_sequenceEnumerator;
 
     // TODO: Should be removed. We already have matrices on this level.
     // Should just get the corresponding pinned memory.
diff --git a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
index 40e9b5f9f..9d35a9f55 100644
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
@@ -124,8 +124,6 @@ HTKMLFReader::HTKMLFReader(MemoryProviderPtr provider,
         RuntimeError("readMethod must be 'blockRandomize' or 'none'.");
     }
 
-    m_randomizer->Initialize(nullptr, readerConfig);
-
     // Create output stream descriptions (all dense)
     for (auto d : deserializers)
     {
diff --git a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h
index e230da21f..094ffa03b 100644
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h
@@ -8,6 +8,7 @@
 #include "Reader.h"
 #include "Packer.h"
 #include "Config.h"
+#include "SequenceEnumerator.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -54,8 +55,7 @@ private:
     // Memory provider (TODO: this will possibly change in the near future.)
     MemoryProviderPtr m_provider;
 
-    // TODO: Randomizer won't implement transformer interface in the near future.
-    TransformerPtr m_randomizer;
+    SequenceEnumeratorPtr m_randomizer;
 
     // Truncation length for BPTT mode.
     size_t m_truncationLength;
diff --git a/Source/Readers/ImageReader/Exports.cpp b/Source/Readers/ImageReader/Exports.cpp
index 3b2bdb7fb..43cf72a65 100644
--- a/Source/Readers/ImageReader/Exports.cpp
+++ b/Source/Readers/ImageReader/Exports.cpp
@@ -12,7 +12,7 @@
 #include "ImageReader.h"
 #include "HeapMemoryProvider.h"
 #include "ImageDataDeserializer.h"
-#include "ImageSlimTransformers.h"
+#include "ImageTransformers.h"
 #include "CorpusDescriptor.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
@@ -51,31 +51,31 @@ extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializ
     return true;
 }
 
-extern "C" DATAREADER_API bool CreateTransformer(SlimTransformer** transformer, const std::wstring& type, const ConfigParameters& config)
+extern "C" DATAREADER_API bool CreateTransformer(Transformer** transformer, const std::wstring& type, const ConfigParameters& config)
 {
     if (type == L"Crop")
     {
-        *transformer = new SlimCropTransformer(config);
+        *transformer = new CropTransformer(config);
     }
     else if (type == L"Scale")
     {
-        *transformer = new SlimScaleTransformer(config);
+        *transformer = new ScaleTransformer(config);
     }
     else if (type == L"Color")
     {
-        *transformer = new SlimColorTransformer(config);
+        *transformer = new ColorTransformer(config);
     }
     else if (type == L"Intensity")
     {
-        *transformer = new SlimIntensityTransformer(config);
+        *transformer = new IntensityTransformer(config);
     }
     else if (type == L"Mean")
     {
-        *transformer = new SlimMeanTransformer(config);
+        *transformer = new MeanTransformer(config);
     }
     else if (type == L"Transpose")
     {
-        *transformer = new SlimTransposeTransformer(config);
+        *transformer = new TransposeTransformer(config);
     }
     else
     {
diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index 5bcfdd2f5..43e9e0bde 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -13,7 +13,7 @@
 #include "FramePacker.h"
 #include "CompositeTransformer.h"
 #include <omp.h>
-#include "ImageSlimTransformers.h"
+#include "ImageTransformers.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -38,7 +38,7 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
 
     auto deserializer = std::make_shared<ImageDataDeserializer>(config);
 
-    TransformerPtr randomizer;
+    SequenceEnumeratorPtr randomizer;
     // Request multi-threaded randomizer operation to speed up CPU-intensive image-decoding and transformations.
     const bool multithreadedGetNextSequences = true;
     if (configHelper.ShouldRandomize())
@@ -52,30 +52,27 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
         randomizer = std::make_shared<NoRandomizer>(deserializer, multithreadedGetNextSequences);
     }
 
-    randomizer->Initialize(nullptr, config);
-
     std::wstring featureName = m_streams[configHelper.GetFeatureStreamId()]->m_name;
     ConfigParameters featureStream = config(featureName);
 
     // Create transformations.
     std::vector<Transformation> transformations;
-    transformations.push_back(Transformation{ std::make_shared<SlimCropTransformer>(featureStream), featureName });
-    transformations.push_back(Transformation{ std::make_shared<SlimScaleTransformer>(featureStream), featureName });
-    transformations.push_back(Transformation{ std::make_shared<SlimColorTransformer>(featureStream), featureName });
-    transformations.push_back(Transformation{ std::make_shared<SlimIntensityTransformer>(featureStream), featureName });
-    transformations.push_back(Transformation{ std::make_shared<SlimMeanTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<CropTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<ScaleTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<ColorTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<IntensityTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<MeanTransformer>(featureStream), featureName });
 
     if (configHelper.GetDataFormat() == CHW)
     {
-        transformations.push_back(Transformation{ std::make_shared<SlimTransposeTransformer>(featureStream), featureName });
+        transformations.push_back(Transformation{ std::make_shared<TransposeTransformer>(featureStream), featureName });
     }
 
-    m_transformer = std::make_shared<CompositeTransformer>(transformations);
-    m_transformer->Initialize(randomizer, config);
+    m_sequenceEnumerator = std::make_shared<TransformController>(transformations, randomizer);
 
     m_packer = std::make_shared<FramePacker>(
         m_provider,
-        m_transformer,
+        m_sequenceEnumerator,
         m_streams);
 }
 
@@ -92,7 +89,7 @@ void ImageReader::StartEpoch(const EpochConfiguration& config)
         RuntimeError("Epoch size cannot be 0.");
     }
 
-    m_transformer->StartEpoch(config);
+    m_sequenceEnumerator->StartEpoch(config);
     m_packer->StartEpoch(config);
 }
 
diff --git a/Source/Readers/ImageReader/ImageReader.h b/Source/Readers/ImageReader/ImageReader.h
index 65038ecee..7197ad27f 100644
--- a/Source/Readers/ImageReader/ImageReader.h
+++ b/Source/Readers/ImageReader/ImageReader.h
@@ -6,8 +6,8 @@
 #pragma once
 
 #include "Reader.h"
-#include "ImageTransformers.h"
 #include "Packer.h"
+#include "SequenceEnumerator.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -34,7 +34,7 @@ private:
     std::vector<StreamDescriptionPtr> m_streams;
 
     // A head transformer in a list of transformers.
-    TransformerPtr m_transformer;
+    SequenceEnumeratorPtr m_sequenceEnumerator;
 
     // Packer.
     PackerPtr m_packer;
diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj b/Source/Readers/ImageReader/ImageReader.vcxproj
index 7bc1da3ae..fbc5ca6fe 100644
--- a/Source/Readers/ImageReader/ImageReader.vcxproj
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj
@@ -121,7 +121,6 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
     <ClInclude Include="ImageConfigHelper.h" />
     <ClInclude Include="ImageDataDeserializer.h" />
     <ClInclude Include="ImageReader.h" />
-    <ClInclude Include="ImageSlimTransformers.h" />
     <ClInclude Include="ImageTransformers.h" />
     <ClInclude Include="stdafx.h" />
     <ClInclude Include="targetver.h" />
@@ -134,7 +133,6 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
       <ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="ImageReader.cpp" />
-    <ClCompile Include="ImageSlimTransformers.cpp" />
     <ClCompile Include="ImageTransformers.cpp" />
     <ClCompile Include="stdafx.cpp">
       <PrecompiledHeader>Create</PrecompiledHeader>
diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj.filters b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
index d2b93926b..69ab43763 100644
--- a/Source/Readers/ImageReader/ImageReader.vcxproj.filters
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
@@ -3,25 +3,12 @@
   <ItemGroup>
     <ClCompile Include="Exports.cpp" />
     <ClCompile Include="stdafx.cpp" />
-    <ClCompile Include="..\..\Common\fileutil.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\Common\File.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
     <ClCompile Include="dllmain.cpp" />
-    <ClCompile Include="ImageTransformers.cpp" />
     <ClCompile Include="ImageDataDeserializer.cpp" />
     <ClCompile Include="ImageReader.cpp" />
     <ClCompile Include="ImageConfigHelper.cpp" />
-    <ClCompile Include="..\..\Common\Config.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
     <ClCompile Include="ZipByteReader.cpp" />
-    <ClCompile Include="ImageSlimTransformers.cpp" />
+    <ClCompile Include="ImageTransformers.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="stdafx.h" />
@@ -35,12 +22,11 @@
     <ClInclude Include="..\..\Common\Include\fileutil.h">
       <Filter>Common\Include</Filter>
     </ClInclude>
-    <ClInclude Include="ImageTransformers.h" />
     <ClInclude Include="ImageDataDeserializer.h" />
     <ClInclude Include="ImageReader.h" />
     <ClInclude Include="ImageConfigHelper.h" />
     <ClInclude Include="ByteReader.h" />
-    <ClInclude Include="ImageSlimTransformers.h" />
+    <ClInclude Include="ImageTransformers.h" />
   </ItemGroup>
   <ItemGroup>
     <Filter Include="Common">
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.cpp b/Source/Readers/ImageReader/ImageSlimTransformers.cpp
deleted file mode 100644
index 8db857e07..000000000
--- a/Source/Readers/ImageReader/ImageSlimTransformers.cpp
+++ /dev/null
@@ -1,647 +0,0 @@
-//
-// Copyright (c) Microsoft. All rights reserved.
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-
-#include "stdafx.h"
-#include <algorithm>
-#include <unordered_map>
-#include <random>
-#include "ImageSlimTransformers.h"
-#include "Config.h"
-#include "ConcStack.h"
-#include "ImageConfigHelper.h"
-#include "StringUtil.h"
-#include "ElementTypeUtils.h"
-
-namespace Microsoft {
-namespace MSR {
-namespace CNTK {
-
-struct ImageSequenceData : DenseSequenceData
-{
-    cv::Mat m_image;
-    // In case we do not copy data - we have to preserve the original sequence.
-    SequenceDataPtr m_original;
-};
-
-SlimImageTransformerBase::SlimImageTransformerBase(const ConfigParameters& cfg) : m_imageElementType(0)
-{
-    m_seed = cfg(L"seed", 0u);
-}
-
-StreamDescription SlimImageTransformerBase::Transform(const StreamDescription& inputStream)
-{
-    m_inputStream = inputStream;
-    m_outputStream = m_inputStream;
-
-    if (m_inputStream.m_storageType != StorageType::dense)
-    {
-        LogicError("ImageTransformerBase supports only dense input streams.");
-    }
-
-    if (m_inputStream.m_elementType == ElementType::tdouble)
-    {
-        m_imageElementType = CV_64F;
-    }
-    else if (m_inputStream.m_elementType == ElementType::tfloat)
-    {
-        m_imageElementType = CV_32F;
-    }
-    else
-    {
-        RuntimeError("Unsupported type");
-    }
-
-    return m_outputStream;
-}
-
-SequenceDataPtr SlimImageTransformerBase::Transform(SequenceDataPtr sequence)
-{
-    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
-
-    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
-    int columns = static_cast<int>(dimensions.m_width);
-    int rows = static_cast<int>(dimensions.m_height);
-    int channels = static_cast<int>(dimensions.m_numChannels);
-
-    auto result = std::make_shared<ImageSequenceData>();
-    int type = CV_MAKETYPE(m_imageElementType, channels);
-    cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
-    Apply(sequence->m_id, buffer);
-    if (!buffer.isContinuous())
-    {
-        buffer = buffer.clone();
-    }
-    else
-    {
-        result->m_original = sequence;
-    }
-    assert(buffer.isContinuous());
-    result->m_image = buffer;
-    result->m_data = buffer.ptr();
-    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
-
-    ImageDimensions outputDimensions(buffer.cols, buffer.rows, buffer.channels());
-    result->m_sampleLayout = std::make_shared<TensorShape>(outputDimensions.AsTensorShape(HWC));
-    return result;
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-SlimCropTransformer::SlimCropTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
-{
-    m_cropType = ParseCropType(config(L"cropType", ""));
-
-    floatargvector cropRatio = config(L"cropRatio", "1.0");
-    m_cropRatioMin = cropRatio[0];
-    m_cropRatioMax = cropRatio[1];
-
-    if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
-        !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
-        m_cropRatioMin > m_cropRatioMax)
-    {
-        RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
-                     "<= cropMax");
-    }
-
-    m_jitterType = ParseJitterType(config(L"jitterType", ""));
-
-    if (!config.ExistsCurrent(L"hflip"))
-    {
-        m_hFlip = m_cropType == CropType::Random;
-    }
-    else
-    {
-        m_hFlip = config(L"hflip");
-    }
-}
-
-void SlimCropTransformer::Apply(size_t id, cv::Mat &mat)
-{
-    auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create(
-        [seed]()
-    {
-        return std::make_unique<std::mt19937>(seed);
-    });
-
-    double ratio = 1;
-    switch (m_jitterType)
-    {
-    case RatioJitterType::None:
-        ratio = m_cropRatioMin;
-        break;
-    case RatioJitterType::UniRatio:
-        if (m_cropRatioMin == m_cropRatioMax)
-        {
-            ratio = m_cropRatioMin;
-        }
-        else
-        {
-            ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
-            assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
-        }
-        break;
-    default:
-        RuntimeError("Jitter type currently not implemented.");
-    }
-
-    int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0;
-
-    mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng));
-    if ((m_hFlip && std::bernoulli_distribution()(*rng)) ||
-        viewIndex >= 5)
-    {
-        cv::flip(mat, mat, 1);
-    }
-
-    m_rngs.push(std::move(rng));
-}
-
-SlimCropTransformer::CropType SlimCropTransformer::ParseCropType(const std::string &src)
-{
-    if (src.empty() || AreEqualIgnoreCase(src, "center"))
-    {
-        return CropType::Center;
-    }
-
-    if (AreEqualIgnoreCase(src, "random"))
-    {
-        return CropType::Random;
-    }
-
-    if (AreEqualIgnoreCase(src, "multiview10"))
-    {
-        return CropType::MultiView10;
-    }
-
-    RuntimeError("Invalid crop type: %s.", src.c_str());
-}
-
-SlimCropTransformer::RatioJitterType SlimCropTransformer::ParseJitterType(const std::string &src)
-{
-    if (src.empty() || AreEqualIgnoreCase(src, "none"))
-    {
-        return RatioJitterType::None;
-    }
-
-    if (AreEqualIgnoreCase(src, "uniratio"))
-    {
-        return RatioJitterType::UniRatio;
-    }
-
-    if (AreEqualIgnoreCase(src, "unilength"))
-    {
-        return RatioJitterType::UniLength;
-    }
-
-    if (AreEqualIgnoreCase(src, "uniarea"))
-    {
-        return RatioJitterType::UniArea;
-    }
-
-    RuntimeError("Invalid jitter type: %s.", src.c_str());
-}
-
-cv::Rect SlimCropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol,
-                                          double cropRatio, std::mt19937 &rng)
-{
-    assert(crow > 0);
-    assert(ccol > 0);
-    assert(0 < cropRatio && cropRatio <= 1.0);
-
-    int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
-    int xOff = -1;
-    int yOff = -1;
-    switch (type)
-    {
-    case CropType::Center:
-        assert(viewIndex == 0);
-        xOff = (ccol - cropSize) / 2;
-        yOff = (crow - cropSize) / 2;
-        break;
-    case CropType::Random:
-        assert(viewIndex == 0);
-        xOff = UniIntT(0, ccol - cropSize)(rng);
-        yOff = UniIntT(0, crow - cropSize)(rng);
-        break;
-    case CropType::MultiView10:
-    {
-        assert(0 <= viewIndex && viewIndex < 10);
-        // 0 - 4: 4 corners + center crop. 5 - 9: same, but with a flip.
-        int isubView = viewIndex % 5;
-        switch (isubView)
-        {
-            // top-left
-        case 0:
-            xOff = 0;
-            yOff = 0;
-            break;
-            // top-right
-        case 1:
-            xOff = ccol - cropSize;
-            yOff = 0;
-            break;
-            // bottom-left
-        case 2:
-            xOff = 0;
-            yOff = crow - cropSize;
-            break;
-            // bottom-right
-        case 3:
-            xOff = ccol - cropSize;
-            yOff = crow - cropSize;
-            break;
-            // center
-        case 4:
-            xOff = (ccol - cropSize) / 2;
-            yOff = (crow - cropSize) / 2;
-            break;
-        }
-        break;
-    }
-    default:
-        assert(false);
-    }
-
-    assert(0 <= xOff && xOff <= ccol - cropSize);
-    assert(0 <= yOff && yOff <= crow - cropSize);
-    return cv::Rect(xOff, yOff, cropSize, cropSize);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-SlimScaleTransformer::SlimScaleTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
-{
-    m_interpMap.emplace("nearest", cv::INTER_NEAREST);
-    m_interpMap.emplace("linear", cv::INTER_LINEAR);
-    m_interpMap.emplace("cubic", cv::INTER_CUBIC);
-    m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
-
-    m_imgWidth = config(L"width");
-    m_imgHeight = config(L"height");
-    m_imgChannels = config(L"channels");
-
-    size_t cfeat = m_imgWidth * m_imgHeight * m_imgChannels;
-    if (cfeat == 0 || cfeat > std::numeric_limits<size_t>().max() / 2)
-        RuntimeError("Invalid image dimensions.");
-
-    m_interp.clear();
-    std::stringstream ss{config(L"interpolations", "")};
-    for (std::string token = ""; std::getline(ss, token, ':');)
-    {
-        // Explicit cast required for GCC.
-        std::transform(token.begin(), token.end(), token.begin(),
-                       (int(*) (int)) std::tolower);
-        StrToIntMapT::const_iterator res = m_interpMap.find(token);
-        if (res != m_interpMap.end())
-            m_interp.push_back((*res).second);
-    }
-
-    if (m_interp.size() == 0)
-        m_interp.push_back(cv::INTER_LINEAR);
-}
-
-StreamDescription SlimScaleTransformer::Transform(const StreamDescription& inputStream)
-{
-    SlimImageTransformerBase::Transform(inputStream);
-    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(m_imgWidth, m_imgHeight, m_imgChannels).AsTensorShape(HWC));
-    return m_outputStream;
-}
-
-
-void SlimScaleTransformer::Apply(size_t id, cv::Mat &mat)
-{
-    UNUSED(id);
-
-    // If matrix has not been converted to the right type, do it now as rescaling
-    // requires floating point type.
-    if (mat.type() != CV_MAKETYPE(m_imageElementType, m_imgChannels))
-    {
-        mat.convertTo(mat, m_imageElementType);
-    }
-
-    auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create(
-        [seed]()
-    {
-        return std::make_unique<std::mt19937>(seed);
-    });
-
-
-    auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
-    assert(m_interp.size() > 0);
-    cv::resize(
-        mat, mat,
-        cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
-        0, m_interp[index]);
-
-    m_rngs.push(std::move(rng));
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-SlimMeanTransformer::SlimMeanTransformer(const ConfigParameters& config) : SlimImageTransformerBase(config)
-{
-    std::wstring meanFile = config(L"meanFile", L"");
-    if (meanFile.empty())
-        m_meanImg.release();
-    else
-    {
-        cv::FileStorage fs;
-        // REVIEW alexeyk: this sort of defeats the purpose of using wstring at
-        // all...  [fseide] no, only OpenCV has this problem.
-        fs.open(msra::strfun::utf8(meanFile).c_str(), cv::FileStorage::READ);
-        if (!fs.isOpened())
-            RuntimeError("Could not open file: %ls", meanFile.c_str());
-        fs["MeanImg"] >> m_meanImg;
-        int cchan;
-        fs["Channel"] >> cchan;
-        int crow;
-        fs["Row"] >> crow;
-        int ccol;
-        fs["Col"] >> ccol;
-        if (cchan * crow * ccol !=
-            m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
-            RuntimeError("Invalid data in file: %ls", meanFile.c_str());
-        fs.release();
-        m_meanImg = m_meanImg.reshape(cchan, crow);
-    }
-}
-
-void SlimMeanTransformer::Apply(size_t id, cv::Mat &mat)
-{
-    UNUSED(id);
-    assert(m_meanImg.size() == cv::Size(0, 0) ||
-           (m_meanImg.size() == mat.size() &&
-           m_meanImg.channels() == mat.channels()));
-
-    // REVIEW alexeyk: check type conversion (float/double).
-    if (m_meanImg.size() == mat.size())
-    {
-        mat = mat - m_meanImg;
-    }
-}
-
-SlimTransposeTransformer::SlimTransposeTransformer(const ConfigParameters&)
-{
-}
-
-StreamDescription SlimTransposeTransformer::Transform(const StreamDescription& inputStream)
-{
-    m_inputStream = inputStream;
-    if (m_inputStream.m_storageType != StorageType::dense)
-    {
-        LogicError("Transpose transformer supports only dense streams.");
-    }
-
-    // Changing from NHWC to NCHW
-    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
-    m_outputStream = m_inputStream;
-    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
-    return m_outputStream;
-}
-
-// Transformation of the sequence.
-SequenceDataPtr SlimTransposeTransformer::Transform(SequenceDataPtr sequence)
-{
-    if (m_inputStream.m_elementType == ElementType::tdouble)
-    {
-        return TypedTransform<double>(sequence);
-    }
-
-    if (m_inputStream.m_elementType == ElementType::tfloat)
-    {
-        return TypedTransform<float>(sequence);
-    }
-
-    RuntimeError("Unsupported type");
-}
-
-// The class represents a sequence that owns an internal data buffer.
-// Passed from the TransposeTransformer.
-// TODO: Trasposition potentially could be done in place.
-struct DenseSequenceWithBuffer : DenseSequenceData
-{
-    std::vector<char> m_buffer;
-};
-
-template <class TElemType>
-SequenceDataPtr SlimTransposeTransformer::TypedTransform(SequenceDataPtr sequence)
-{
-    auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
-    assert(inputSequence.m_numberOfSamples == 1);
-
-    size_t count = m_inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(m_inputStream.m_elementType);
-
-    auto result = std::make_shared<DenseSequenceWithBuffer>();
-    result->m_buffer.resize(count);
-
-    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, ImageLayoutKind::HWC);
-    size_t rowCount = dimensions.m_height * dimensions.m_width;
-    size_t channelCount = dimensions.m_numChannels;
-
-    auto src = reinterpret_cast<TElemType*>(inputSequence.m_data);
-    auto dst = reinterpret_cast<TElemType*>(result->m_buffer.data());
-
-    for (size_t irow = 0; irow < rowCount; irow++)
-    {
-        for (size_t icol = 0; icol < channelCount; icol++)
-        {
-            dst[icol * rowCount + irow] = src[irow * channelCount + icol];
-        }
-    }
-
-    result->m_sampleLayout = m_outputStream.m_sampleLayout;
-    result->m_data = result->m_buffer.data();
-    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
-    return result;
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-SlimIntensityTransformer::SlimIntensityTransformer(const ConfigParameters &config) : SlimImageTransformerBase(config)
-{
-    m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
-    std::wstring intFile = config(L"intensityFile", L"");
-    if (intFile.empty())
-    {
-        m_eigVal.release();
-        m_eigVec.release();
-    }
-    else
-    {
-        cv::FileStorage fs;
-        fs.open(msra::strfun::utf8(intFile).c_str(), cv::FileStorage::READ);
-        if (!fs.isOpened())
-            RuntimeError("Could not open file: %ls", intFile.c_str());
-        fs["EigVal"] >> m_eigVal;
-        if (m_eigVal.rows != 1 || m_eigVal.cols != 3 || m_eigVal.channels() != 1)
-            RuntimeError("Invalid EigVal data in file: %ls", intFile.c_str());
-        fs["EigVec"] >> m_eigVec;
-        if (m_eigVec.rows != 3 || m_eigVec.cols != 3 || m_eigVec.channels() != 1)
-            RuntimeError("Invalid EigVec data in file: %ls", intFile.c_str());
-        fs.release();
-    }
-}
-
-void SlimIntensityTransformer::StartEpoch(const EpochConfiguration &config)
-{
-    m_curStdDev = m_stdDev[config.m_epochIndex];
-}
-
-void SlimIntensityTransformer::Apply(size_t id, cv::Mat &mat)
-{
-    UNUSED(id);
-
-    if (m_eigVal.empty() || m_eigVec.empty() || m_curStdDev == 0)
-        return;
-
-    if (mat.type() == CV_64FC(mat.channels()))
-        Apply<double>(mat);
-    else if (mat.type() == CV_32FC(mat.channels()))
-        Apply<float>(mat);
-    else
-        RuntimeError("Unsupported type");
-}
-
-template <typename ElemType>
-void SlimIntensityTransformer::Apply(cv::Mat &mat)
-{
-    auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
-
-    // Using single precision as EigVal and EigVec matrices are single precision.
-    std::normal_distribution<float> d(0, (float)m_curStdDev);
-    cv::Mat alphas(1, 3, CV_32FC1);
-    assert(m_eigVal.rows == 1 && m_eigVec.cols == 3);
-    alphas.at<float>(0) = d(*rng) * m_eigVal.at<float>(0);
-    alphas.at<float>(1) = d(*rng) * m_eigVal.at<float>(1);
-    alphas.at<float>(2) = d(*rng) * m_eigVal.at<float>(2);
-    m_rngs.push(std::move(rng));
-
-    assert(m_eigVec.rows == 3 && m_eigVec.cols == 3);
-
-    cv::Mat shifts = m_eigVec * alphas.t();
-
-    // For multi-channel images data is in BGR format.
-    size_t cdst = mat.rows * mat.cols * mat.channels();
-    ElemType* pdstBase = reinterpret_cast<ElemType*>(mat.data);
-    for (ElemType* pdst = pdstBase; pdst < pdstBase + cdst;)
-    {
-        for (int c = 0; c < mat.channels(); c++)
-        {
-            float shift = shifts.at<float>(mat.channels() - c - 1);
-            *pdst = std::min(std::max(*pdst + shift, (ElemType)0), (ElemType)255);
-            pdst++;
-        }
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-SlimColorTransformer::SlimColorTransformer(const ConfigParameters &config) : SlimImageTransformerBase(config)
-{
-    m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
-    m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
-    m_saturationRadius = config(L"saturationRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
-}
-
-void SlimColorTransformer::StartEpoch(const EpochConfiguration &config)
-{
-    m_curBrightnessRadius = m_brightnessRadius[config.m_epochIndex];
-    if (!(0 <= m_curBrightnessRadius && m_curBrightnessRadius <= 1.0))
-        InvalidArgument("brightnessRadius must be >= 0.0 and <= 1.0");
-
-    m_curContrastRadius = m_contrastRadius[config.m_epochIndex];
-    if (!(0 <= m_curContrastRadius && m_curContrastRadius <= 1.0))
-        InvalidArgument("contrastRadius must be >= 0.0 and <= 1.0");
-
-    m_curSaturationRadius = m_saturationRadius[config.m_epochIndex];
-    if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0))
-        InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
-}
-
-void SlimColorTransformer::Apply(size_t id, cv::Mat &mat)
-{
-    UNUSED(id);
-
-    if (m_curBrightnessRadius == 0 && m_curContrastRadius == 0 && m_curSaturationRadius == 0)
-        return;
-
-    if (mat.type() == CV_64FC(mat.channels()))
-        Apply<double>(mat);
-    else if (mat.type() == CV_32FC(mat.channels()))
-        Apply<float>(mat);
-    else
-        RuntimeError("Unsupported type");
-}
-
-template <typename ElemType>
-void SlimColorTransformer::Apply(cv::Mat &mat)
-{
-    auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
-
-    if (m_curBrightnessRadius > 0 || m_curContrastRadius > 0)
-    {
-        // To change brightness and/or contrast the following standard transformation is used:
-        // Xij = alpha * Xij + beta, where
-        // alpha is a contrast adjustment and beta - brightness adjustment.
-        ElemType beta = 0;
-        if (m_curBrightnessRadius > 0)
-        {
-            UniRealT d(-m_curBrightnessRadius, m_curBrightnessRadius);
-            // Compute mean value of the image.
-            cv::Scalar imgMean = cv::sum(cv::sum(mat));
-            // Compute beta as a fraction of the mean.
-            beta = (ElemType)(d(*rng) * imgMean[0] / (mat.rows * mat.cols * mat.channels()));
-        }
-
-        ElemType alpha = 1;
-        if (m_curContrastRadius > 0)
-        {
-            UniRealT d(-m_curContrastRadius, m_curContrastRadius);
-            alpha = (ElemType)(1 + d(*rng));
-        }
-
-        // Could potentially use mat.convertTo(mat, -1, alpha, beta) 
-        // but it does not do range checking for single/double precision matrix. saturate_cast won't work either.
-        size_t count = mat.rows * mat.cols * mat.channels();
-        ElemType* pbase = reinterpret_cast<ElemType*>(mat.data);
-        for (ElemType* p = pbase; p < pbase + count; p++)
-        {
-            *p = std::min(std::max(*p * alpha + beta, (ElemType)0), (ElemType)255);
-        }
-    }
-
-    if (m_curSaturationRadius > 0 && mat.channels() == 3)
-    {
-        UniRealT d(-m_curSaturationRadius, m_curSaturationRadius);
-        double ratio = 1.0 + d(*rng);
-        assert(0 <= ratio && ratio <= 2);
-
-        auto hsv = m_hsvTemp.pop_or_create([]() { return std::make_unique<cv::Mat>(); });
-
-        // To change saturation, we need to convert the image to HSV format first,
-        // the change S channgel and convert the image back to BGR format.
-        cv::cvtColor(mat, *hsv, CV_BGR2HSV);
-        assert(hsv->rows == mat.rows && hsv->cols == mat.cols);
-        size_t count = hsv->rows * hsv->cols * mat.channels();
-        ElemType* phsvBase = reinterpret_cast<ElemType*>(hsv->data);
-        for (ElemType* phsv = phsvBase; phsv < phsvBase + count; phsv += 3)
-        {
-            const int HsvIndex = 1;
-            phsv[HsvIndex] = std::min((ElemType)(phsv[HsvIndex] * ratio), (ElemType)1);
-        }
-        cv::cvtColor(*hsv, mat, CV_HSV2BGR);
-
-        m_hsvTemp.push(std::move(hsv));
-    }
-
-    m_rngs.push(std::move(rng));
-}
-
-
-}}}
diff --git a/Source/Readers/ImageReader/ImageSlimTransformers.h b/Source/Readers/ImageReader/ImageSlimTransformers.h
deleted file mode 100644
index 9407a9b3a..000000000
--- a/Source/Readers/ImageReader/ImageSlimTransformers.h
+++ /dev/null
@@ -1,201 +0,0 @@
-//
-// Copyright (c) Microsoft. All rights reserved.
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-
-#pragma once
-
-#include <unordered_map>
-#include <random>
-#include <opencv2/opencv.hpp>
-
-#include "Transformer.h"
-#include "ConcStack.h"
-#include "Config.h"
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-class ConfigParameters;
-
-// Base class for image transformations based on OpenCV
-// that helps to wrap the sequences into OpenCV::Mat class.
-class SlimImageTransformerBase : public SlimTransformer
-{
-public:
-    explicit SlimImageTransformerBase(const ConfigParameters& config);
-
-    void StartEpoch(const EpochConfiguration&) override {}
-
-    // Transformation of the stream.
-    StreamDescription Transform(const StreamDescription& inputStream) override;
-
-    // Transformation of the sequence.
-    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
-
-protected:
-    // Seed  getter.
-    unsigned int GetSeed() const
-    {
-        return m_seed;
-    }
-
-    using Base = SlimTransformer;
-    using UniRealT = std::uniform_real_distribution<double>;
-    using UniIntT = std::uniform_int_distribution<int>;
-
-    // The only function that should be redefined by the inherited classes.
-    virtual void Apply(size_t id, cv::Mat &from) = 0;
-
-protected:
-    StreamDescription m_inputStream;
-    StreamDescription m_outputStream;
-
-    unsigned int m_seed;
-    int m_imageElementType;
-    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-};
-
-// Crop transformation of the image.
-// Can work on images of any size.
-class SlimCropTransformer : public SlimImageTransformerBase
-{
-public:
-    explicit SlimCropTransformer(const ConfigParameters& config);
-
-protected:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
-
-private:
-    enum class CropType
-    {
-        Center = 0,
-        Random = 1,
-        MultiView10 = 2
-    };
-    enum class RatioJitterType
-    {
-        None = 0,
-        UniRatio = 1,
-        UniLength = 2,
-        UniArea = 3
-    };
-
-    CropType ParseCropType(const std::string &src);
-    RatioJitterType ParseJitterType(const std::string &src);
-    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio,
-                         std::mt19937 &rng);
-
-    CropType m_cropType;
-    double m_cropRatioMin;
-    double m_cropRatioMax;
-    RatioJitterType m_jitterType;
-    bool m_hFlip;
-};
-
-// Scale transformation of the image.
-// Scales the image to the dimensions requested by the network.
-class SlimScaleTransformer : public SlimImageTransformerBase
-{
-public:
-    explicit SlimScaleTransformer(const ConfigParameters& config);
-
-    StreamDescription Transform(const StreamDescription& inputStream) override;
-
-private:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
-
-    using StrToIntMapT = std::unordered_map<std::string, int>;
-    StrToIntMapT m_interpMap;
-    std::vector<int> m_interp;
-
-    size_t m_imgWidth;
-    size_t m_imgHeight;
-    size_t m_imgChannels;
-};
-
-// Mean transformation.
-class SlimMeanTransformer : public SlimImageTransformerBase
-{
-public:
-    explicit SlimMeanTransformer(const ConfigParameters& config);
-
-private:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
-
-    cv::Mat m_meanImg;
-};
-
-// Transpose transformation from HWC to CHW.
-class SlimTransposeTransformer : public SlimTransformer
-{
-public:
-    explicit SlimTransposeTransformer(const ConfigParameters& config);
-
-    void StartEpoch(const EpochConfiguration&) override {}
-
-    // Transformation of the stream.
-    StreamDescription Transform(const StreamDescription& inputStream) override;
-
-    // Transformation of the sequence.
-    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
-
-private:
-    template <class TElement>
-    SequenceDataPtr TypedTransform(SequenceDataPtr inputSequence);
-
-    StreamDescription m_inputStream;
-    StreamDescription m_outputStream;
-};
-
-// Intensity jittering based on PCA transform as described in original AlexNet paper
-// (http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
-// Currently uses precomputed values from 
-// https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua
-// but should be replaced with per-class values?
-class SlimIntensityTransformer : public SlimImageTransformerBase
-{
-public:
-    explicit SlimIntensityTransformer(const ConfigParameters& config);
-
-    void StartEpoch(const EpochConfiguration &config) override;
-    void Apply(size_t id, cv::Mat &mat) override;
-
-private:
-    template <typename ElemType>
-    void Apply(cv::Mat &mat);
-
-    doubleargvector m_stdDev;
-    double m_curStdDev;
-
-    cv::Mat m_eigVal;
-    cv::Mat m_eigVec;
-
-    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-};
-
-// Color jittering transform based on the paper: http://arxiv.org/abs/1312.5402
-// In short, the transform randomly changes contrast, brightness and color of the image.
-class SlimColorTransformer : public SlimImageTransformerBase
-{
-public:
-    explicit SlimColorTransformer(const ConfigParameters& config);
-    void StartEpoch(const EpochConfiguration &config) override;
-    void Apply(size_t id, cv::Mat &mat) override;
-
-private:
-    template <typename ElemType>
-    void Apply(cv::Mat &mat);
-
-    doubleargvector m_brightnessRadius;
-    double m_curBrightnessRadius;
-    doubleargvector m_contrastRadius;
-    double m_curContrastRadius;
-    doubleargvector m_saturationRadius;
-    double m_curSaturationRadius;
-
-    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-    conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;
-};
-
-
-}}}
diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index 271e99575..384390862 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -13,8 +13,9 @@
 #include "StringUtil.h"
 #include "ElementTypeUtils.h"
 
-namespace Microsoft { namespace MSR { namespace CNTK
-{
+namespace Microsoft {
+namespace MSR {
+namespace CNTK {
 
 struct ImageSequenceData : DenseSequenceData
 {
@@ -23,54 +24,49 @@ struct ImageSequenceData : DenseSequenceData
     SequenceDataPtr m_original;
 };
 
-void ImageTransformerBase::Initialize(TransformerPtr next,
-                                      const ConfigParameters &readerConfig)
+ImageTransformerBase::ImageTransformerBase(const ConfigParameters& cfg) : m_imageElementType(0)
 {
-    Base::Initialize(next, readerConfig);
     m_imageConfig = std::make_unique<ImageConfigHelper>(readerConfig);
-
-    m_seed = readerConfig(L"seed", (unsigned int)0);
-
-    size_t featureStreamId = m_imageConfig->GetFeatureStreamId();
-    m_appliedStreamIds.push_back(featureStreamId);
-    if (m_appliedStreamIds.size() != 1)
-    {
-        RuntimeError("Only a single feature stream is supported.");
-    }
-
-    const auto &inputStreams = GetInputStreams();
-    m_outputStreams.resize(inputStreams.size());
-    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
+    m_seed = cfg(L"seed", 0u);
 }
 
-SequenceDataPtr
-ImageTransformerBase::Apply(SequenceDataPtr sequence,
-                            const StreamDescription &inputStream,
-                            const StreamDescription & /*outputStream*/)
+StreamDescription ImageTransformerBase::Transform(const StreamDescription& inputStream)
 {
-    assert(inputStream.m_storageType == StorageType::dense);
-    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence.get());
-    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
-    int columns = static_cast<int>(dimensions.m_width);
-    int rows = static_cast<int>(dimensions.m_height);
-    int channels = static_cast<int>(dimensions.m_numChannels);
+    m_inputStream = inputStream;
+    m_outputStream = m_inputStream;
 
-    int typeId = 0;
-    if (inputStream.m_elementType == ElementType::tdouble)
+    if (m_inputStream.m_storageType != StorageType::dense)
     {
-        typeId = CV_64F;
+        LogicError("ImageTransformerBase supports only dense input streams.");
     }
-    else if (inputStream.m_elementType == ElementType::tfloat)
+
+    if (m_inputStream.m_elementType == ElementType::tdouble)
     {
-        typeId = CV_32F;
+        m_imageElementType = CV_64F;
+    }
+    else if (m_inputStream.m_elementType == ElementType::tfloat)
+    {
+        m_imageElementType = CV_32F;
     }
     else
     {
         RuntimeError("Unsupported type");
     }
 
+    return m_outputStream;
+}
+
+SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
+{
+    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
+
+    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
+    int columns = static_cast<int>(dimensions.m_width);
+    int rows = static_cast<int>(dimensions.m_height);
+    int channels = static_cast<int>(dimensions.m_numChannels);
+
     auto result = std::make_shared<ImageSequenceData>();
-    int type = CV_MAKETYPE(typeId, channels);
+    int type = CV_MAKETYPE(m_imageElementType, channels);
     cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
     Apply(sequence->m_id, buffer);
     if (!buffer.isContinuous())
@@ -93,15 +89,7 @@ ImageTransformerBase::Apply(SequenceDataPtr sequence,
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void CropTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void CropTransformer::InitFromConfig(const ConfigParameters &config)
+CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
     floatargvector cropRatio = config(L"cropRatio", "1.0");
     m_cropRatioMin = cropRatio[0];
@@ -125,23 +113,16 @@ void CropTransformer::InitFromConfig(const ConfigParameters &config)
     {
         m_hFlip = config(L"hflip");
     }
-
-    m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
-}
-
-void CropTransformer::StartEpoch(const EpochConfiguration &config)
-{
-    m_curAspectRatioRadius = m_aspectRatioRadius[config.m_epochIndex];
-    if (!(0 <= m_curAspectRatioRadius && m_curAspectRatioRadius <= 1.0))
-        InvalidArgument("aspectRatioRadius must be >= 0.0 and <= 1.0");
-
-    ImageTransformerBase::StartEpoch(config);
 }
 
 void CropTransformer::Apply(size_t id, cv::Mat &mat)
 {
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+    {
+        return std::make_unique<std::mt19937>(seed);
+    });
 
     double ratio = 1;
     switch (m_jitterType)
@@ -176,6 +157,26 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
+CropTransformer::CropType CropTransformer::ParseCropType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "center"))
+    {
+        return CropType::Center;
+    }
+
+    if (AreEqualIgnoreCase(src, "random"))
+    {
+        return CropType::Random;
+    }
+
+    if (AreEqualIgnoreCase(src, "multiview10"))
+    {
+        return CropType::MultiView10;
+    }
+
+    RuntimeError("Invalid crop type: %s.", src.c_str());
+}
+
 CropTransformer::RatioJitterType CropTransformer::ParseJitterType(const std::string &src)
 {
     if (src.empty() || AreEqualIgnoreCase(src, "none"))
@@ -202,50 +203,26 @@ CropTransformer::RatioJitterType CropTransformer::ParseJitterType(const std::str
 }
 
 cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol,
-                                      double cropRatio, std::mt19937 &rng)
+                                          double cropRatio, std::mt19937 &rng)
 {
     assert(crow > 0);
     assert(ccol > 0);
     assert(0 < cropRatio && cropRatio <= 1.0);
 
-    // Get square crop size that preserves aspect ratio.
-    int cropSize = (int)(std::min(crow, ccol) * cropRatio);
-    int cropSizeX = cropSize;
-    int cropSizeY = cropSize;
-    // Change aspect ratio, if this option is enabled.
-    if (m_curAspectRatioRadius > 0)
-    {
-        double factor = 1.0 + UniRealT(-m_curAspectRatioRadius, m_curAspectRatioRadius)(rng);
-        double area = cropSize * cropSize;
-        double newArea = area * factor;
-        if (std::bernoulli_distribution()(rng))
-        {
-            cropSizeX = (int)std::sqrt(newArea);
-            cropSizeY = (int)(area / cropSizeX);
-        }
-        else
-        {
-            cropSizeY = (int)std::sqrt(newArea);
-            cropSizeX = (int)(area / cropSizeY);
-        }
-        // This clamping should be ok if jittering ratio is not too big.
-        cropSizeX = std::min(cropSizeX, ccol);
-        cropSizeY = std::min(cropSizeY, crow);
-    }
-
+    int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
     int xOff = -1;
     int yOff = -1;
     switch (type)
     {
     case CropType::Center:
         assert(viewIndex == 0);
-        xOff = (ccol - cropSizeX) / 2;
-        yOff = (crow - cropSizeY) / 2;
+        xOff = (ccol - cropSize) / 2;
+        yOff = (crow - cropSize) / 2;
         break;
     case CropType::Random:
         assert(viewIndex == 0);
-        xOff = UniIntT(0, ccol - cropSizeX)(rng);
-        yOff = UniIntT(0, crow - cropSizeY)(rng);
+        xOff = UniIntT(0, ccol - cropSize)(rng);
+        yOff = UniIntT(0, crow - cropSize)(rng);
         break;
     case CropType::MultiView10:
     {
@@ -254,30 +231,30 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
         int isubView = viewIndex % 5;
         switch (isubView)
         {
-        // top-left
+            // top-left
         case 0:
             xOff = 0;
             yOff = 0;
             break;
-        // top-right
+            // top-right
         case 1:
-            xOff = ccol - cropSizeX;
+            xOff = ccol - cropSize;
             yOff = 0;
             break;
-        // bottom-left
+            // bottom-left
         case 2:
             xOff = 0;
-            yOff = crow - cropSizeY;
+            yOff = crow - cropSize;
             break;
-        // bottom-right
+            // bottom-right
         case 3:
-            xOff = ccol - cropSizeX;
-            yOff = crow - cropSizeY;
+            xOff = ccol - cropSize;
+            yOff = crow - cropSize;
             break;
-        // center
+            // center
         case 4:
-            xOff = (ccol - cropSizeX) / 2;
-            yOff = (crow - cropSizeY) / 2;
+            xOff = (ccol - cropSize) / 2;
+            yOff = (crow - cropSize) / 2;
             break;
         }
         break;
@@ -286,31 +263,20 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
         assert(false);
     }
 
-    assert(0 <= xOff && xOff <= ccol - cropSizeX);
-    assert(0 <= yOff && yOff <= crow - cropSizeY);
-    return cv::Rect(xOff, yOff, cropSizeX, cropSizeY);
+    assert(0 <= xOff && xOff <= ccol - cropSize);
+    assert(0 <= yOff && yOff <= crow - cropSize);
+    return cv::Rect(xOff, yOff, cropSize, cropSize);
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void ScaleTransformer::Initialize(TransformerPtr next,
-                                  const ConfigParameters &readerConfig)
+ScaleTransformer::ScaleTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
-    ImageTransformerBase::Initialize(next, readerConfig);
     m_interpMap.emplace("nearest", cv::INTER_NEAREST);
     m_interpMap.emplace("linear", cv::INTER_LINEAR);
     m_interpMap.emplace("cubic", cv::INTER_CUBIC);
     m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
 
-    auto featureStreamIds = GetAppliedStreamIds();
-    const auto &feature = GetInputStreams()[featureStreamIds[0]];
-    m_dataType = feature->m_elementType == ElementType::tfloat ? CV_32F : CV_64F;
-
-    InitFromConfig(readerConfig(feature->m_name));
-}
-
-void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
-{
     m_imgWidth = config(L"width");
     m_imgHeight = config(L"height");
     m_imgChannels = config(L"channels");
@@ -325,7 +291,7 @@ void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
     {
         // Explicit cast required for GCC.
         std::transform(token.begin(), token.end(), token.begin(),
-                       (int (*) (int)) std::tolower);
+                       (int(*) (int)) std::tolower);
         StrToIntMapT::const_iterator res = m_interpMap.find(token);
         if (res != m_interpMap.end())
             m_interp.push_back((*res).second);
@@ -335,40 +301,46 @@ void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
         m_interp.push_back(cv::INTER_LINEAR);
 }
 
+StreamDescription ScaleTransformer::Transform(const StreamDescription& inputStream)
+{
+    ImageTransformerBase::Transform(inputStream);
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(m_imgWidth, m_imgHeight, m_imgChannels).AsTensorShape(HWC));
+    return m_outputStream;
+}
+
+
 void ScaleTransformer::Apply(size_t id, cv::Mat &mat)
 {
     UNUSED(id);
+
     // If matrix has not been converted to the right type, do it now as rescaling
     // requires floating point type.
-    //
-    if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
+    if (mat.type() != CV_MAKETYPE(m_imageElementType, m_imgChannels))
     {
-        mat.convertTo(mat, m_dataType);
+        mat.convertTo(mat, m_imageElementType);
     }
 
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
+    auto rng = m_rngs.pop_or_create(
+        [seed]()
+    {
+        return std::make_unique<std::mt19937>(seed);
+    });
+
 
     auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
     assert(m_interp.size() > 0);
-
-    cv::resize(mat, mat, cv::Size((int)m_imgWidth, (int)m_imgHeight), 0, 0, m_interp[index]);
+    cv::resize(
+        mat, mat,
+        cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
+        0, m_interp[index]);
 
     m_rngs.push(std::move(rng));
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void MeanTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void MeanTransformer::InitFromConfig(const ConfigParameters &config)
+MeanTransformer::MeanTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
     std::wstring meanFile = config(L"meanFile", L"");
     if (meanFile.empty())
@@ -401,7 +373,7 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
     UNUSED(id);
     assert(m_meanImg.size() == cv::Size(0, 0) ||
            (m_meanImg.size() == mat.size() &&
-            m_meanImg.channels() == mat.channels()));
+           m_meanImg.channels() == mat.channels()));
 
     // REVIEW alexeyk: check type conversion (float/double).
     if (m_meanImg.size() == mat.size())
@@ -410,48 +382,36 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
     }
 }
 
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void TransposeTransformer::Initialize(TransformerPtr next,
-                                      const ConfigParameters &readerConfig)
+TransposeTransformer::TransposeTransformer(const ConfigParameters&)
 {
-    TransformerBase::Initialize(next, readerConfig);
-
-    // Currently we only support a single stream.
-    ImageConfigHelper config(readerConfig);
-    size_t featureStreamId = config.GetFeatureStreamId();
-    m_appliedStreamIds.push_back(featureStreamId);
-
-    const auto &inputStreams = GetInputStreams();
-    m_outputStreams.resize(inputStreams.size());
-    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
-
-    for (auto id : m_appliedStreamIds)
-    {
-        auto &stream = inputStreams[id];
-
-        ImageDimensions dimensions(*stream->m_sampleLayout, HWC);
-
-        // Changing from NHWC to NCHW (note: row-major notation)
-        auto changedStream = std::make_shared<StreamDescription>(*stream);
-        changedStream->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
-        m_outputStreams[id] = changedStream;
-    }
 }
 
-SequenceDataPtr
-TransposeTransformer::Apply(SequenceDataPtr inputSequence,
-                            const StreamDescription &inputStream,
-                            const StreamDescription &outputStream)
+StreamDescription TransposeTransformer::Transform(const StreamDescription& inputStream)
 {
-    if (inputStream.m_elementType == ElementType::tdouble)
+    m_inputStream = inputStream;
+    if (m_inputStream.m_storageType != StorageType::dense)
     {
-        return TypedApply<double>(inputSequence, inputStream, outputStream);
+        LogicError("Transpose transformer supports only dense streams.");
     }
 
-    if (inputStream.m_elementType == ElementType::tfloat)
+    // Changing from NHWC to NCHW
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
+    m_outputStream = m_inputStream;
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
+    return m_outputStream;
+}
+
+// Transformation of the sequence.
+SequenceDataPtr TransposeTransformer::Transform(SequenceDataPtr sequence)
+{
+    if (m_inputStream.m_elementType == ElementType::tdouble)
     {
-        return TypedApply<float>(inputSequence, inputStream, outputStream);
+        return TypedTransform<double>(sequence);
+    }
+
+    if (m_inputStream.m_elementType == ElementType::tfloat)
+    {
+        return TypedTransform<float>(sequence);
     }
 
     RuntimeError("Unsupported type");
@@ -459,28 +419,24 @@ TransposeTransformer::Apply(SequenceDataPtr inputSequence,
 
 // The class represents a sequence that owns an internal data buffer.
 // Passed from the TransposeTransformer.
-// TODO: Transposition potentially could be done in place (alexeyk: performance might be much worse than of out-of-place transpose).
+// TODO: Trasposition potentially could be done in place.
 struct DenseSequenceWithBuffer : DenseSequenceData
 {
     std::vector<char> m_buffer;
 };
 
 template <class TElemType>
-SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,
-                                                 const StreamDescription &inputStream,
-                                                 const StreamDescription &outputStream)
+SequenceDataPtr TransposeTransformer::TypedTransform(SequenceDataPtr sequence)
 {
-    assert(inputStream.m_storageType == StorageType::dense);
-    auto inputSequence = static_cast<DenseSequenceData&>(*sequence.get());
+    auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
     assert(inputSequence.m_numberOfSamples == 1);
-    assert(inputStream.m_sampleLayout->GetNumElements() == outputStream.m_sampleLayout->GetNumElements());
 
-    size_t count = inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(inputStream.m_elementType);
+    size_t count = m_inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(m_inputStream.m_elementType);
 
     auto result = std::make_shared<DenseSequenceWithBuffer>();
     result->m_buffer.resize(count);
 
-    ImageDimensions dimensions(*inputStream.m_sampleLayout, ImageLayoutKind::HWC);
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, ImageLayoutKind::HWC);
     size_t rowCount = dimensions.m_height * dimensions.m_width;
     size_t channelCount = dimensions.m_numChannels;
 
@@ -495,7 +451,7 @@ SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,
         }
     }
 
-    result->m_sampleLayout = outputStream.m_sampleLayout;
+    result->m_sampleLayout = m_outputStream.m_sampleLayout;
     result->m_data = result->m_buffer.data();
     result->m_numberOfSamples = inputSequence.m_numberOfSamples;
     return result;
@@ -503,16 +459,7 @@ SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void IntensityTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void IntensityTransformer::InitFromConfig(const ConfigParameters &config)
+IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
 {
     m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
     std::wstring intFile = config(L"intensityFile", L"");
@@ -540,8 +487,6 @@ void IntensityTransformer::InitFromConfig(const ConfigParameters &config)
 void IntensityTransformer::StartEpoch(const EpochConfiguration &config)
 {
     m_curStdDev = m_stdDev[config.m_epochIndex];
-
-    ImageTransformerBase::StartEpoch(config);
 }
 
 void IntensityTransformer::Apply(size_t id, cv::Mat &mat)
@@ -563,7 +508,7 @@ template <typename ElemType>
 void IntensityTransformer::Apply(cv::Mat &mat)
 {
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); } );
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
 
     // Using single precision as EigVal and EigVec matrices are single precision.
     std::normal_distribution<float> d(0, (float)m_curStdDev);
@@ -594,15 +539,7 @@ void IntensityTransformer::Apply(cv::Mat &mat)
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void ColorTransformer::Initialize(TransformerPtr next, const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void ColorTransformer::InitFromConfig(const ConfigParameters &config)
+ColorTransformer::ColorTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
 {
     m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
     m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
@@ -622,8 +559,6 @@ void ColorTransformer::StartEpoch(const EpochConfiguration &config)
     m_curSaturationRadius = m_saturationRadius[config.m_epochIndex];
     if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0))
         InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
-
-    ImageTransformerBase::StartEpoch(config);
 }
 
 void ColorTransformer::Apply(size_t id, cv::Mat &mat)
@@ -706,4 +641,5 @@ void ColorTransformer::Apply(cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
+
 }}}
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index a05af85b0..4de6c5dc4 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -11,7 +11,6 @@
 
 #include "Transformer.h"
 #include "ConcStack.h"
-#include "TransformerBase.h"
 #include "Config.h"
 #include "ImageConfigHelper.h"
 
@@ -21,48 +20,41 @@ class ConfigParameters;
 
 // Base class for image transformations based on OpenCV
 // that helps to wrap the sequences into OpenCV::Mat class.
-class ImageTransformerBase : public TransformerBase
+class ImageTransformerBase : public Transformer
 {
 public:
-    // Initializes the transformer.
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit ImageTransformerBase(const ConfigParameters& config);
+
+    void StartEpoch(const EpochConfiguration&) override {}
+
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream) override;
+
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
 
 protected:
-    const std::vector<StreamId> &GetAppliedStreamIds() const override
-    {
-        return m_appliedStreamIds;
-    }
-
-    const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
-    {
-        return m_outputStreams;
-    }
-
     // Seed  getter.
     unsigned int GetSeed() const
     {
         return m_seed;
     }
 
-    using Base = TransformerBase;
+    using Base = Transformer;
     using UniRealT = std::uniform_real_distribution<double>;
     using UniIntT = std::uniform_int_distribution<int>;
 
-    // Applies transformation to the sequence.
-    SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                          const StreamDescription &inputStream,
-                          const StreamDescription &outputStream) override;
-
     // The only function that should be redefined by the inherited classes.
     virtual void Apply(size_t id, cv::Mat &from) = 0;
 
 protected:
     std::unique_ptr<ImageConfigHelper> m_imageConfig;
 
-private:
-    std::vector<StreamDescriptionPtr> m_outputStreams;
-    std::vector<StreamId> m_appliedStreamIds;
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
     unsigned int m_seed;
+    int m_imageElementType;
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
 };
 
 // Crop transformation of the image.
@@ -70,10 +62,10 @@ private:
 class CropTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit CropTransformer(const ConfigParameters& config);
 
-private:
-    void Apply(size_t id, cv::Mat &mat) override;
+protected:
+    virtual void Apply(size_t id, cv::Mat &mat) override;
 
 private:
     enum class RatioJitterType
@@ -84,20 +76,19 @@ private:
         UniArea = 3
     };
 
-    void InitFromConfig(const ConfigParameters &config);
 
     void StartEpoch(const EpochConfiguration &config) override;
 
+    CropType ParseCropType(const std::string &src);
     RatioJitterType ParseJitterType(const std::string &src);
-    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);
+    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio,
+                         std::mt19937 &rng);
 
-    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
+    CropType m_cropType;
     double m_cropRatioMin;
     double m_cropRatioMax;
     RatioJitterType m_jitterType;
     bool m_hFlip;
-    doubleargvector m_aspectRatioRadius;
-    double m_curAspectRatioRadius;
 };
 
 // Scale transformation of the image.
@@ -105,19 +96,17 @@ private:
 class ScaleTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next,
-                            const ConfigParameters &readerConfig) override;
+    explicit ScaleTransformer(const ConfigParameters& config);
+
+    StreamDescription Transform(const StreamDescription& inputStream) override;
 
 private:
-    void InitFromConfig(const ConfigParameters &config);
-    void Apply(size_t id, cv::Mat &mat) override;
+    virtual void Apply(size_t id, cv::Mat &mat) override;
 
     using StrToIntMapT = std::unordered_map<std::string, int>;
     StrToIntMapT m_interpMap;
     std::vector<int> m_interp;
 
-    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-    int m_dataType;
     size_t m_imgWidth;
     size_t m_imgHeight;
     size_t m_imgChannels;
@@ -127,45 +116,34 @@ private:
 class MeanTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next,
-                            const ConfigParameters &readerConfig) override;
+    explicit MeanTransformer(const ConfigParameters& config);
 
 private:
-    void Apply(size_t id, cv::Mat &mat) override;
-    void InitFromConfig(const ConfigParameters &config);
+    virtual void Apply(size_t id, cv::Mat &mat) override;
 
     cv::Mat m_meanImg;
 };
 
-// Transpose transformation from HWC to CHW (note: row-major notation).
-class TransposeTransformer : public TransformerBase
+// Transpose transformation from HWC to CHW.
+class TransposeTransformer : public Transformer
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit TransposeTransformer(const ConfigParameters& config);
 
-protected:
-    const std::vector<StreamId>& GetAppliedStreamIds() const override
-    {
-        return m_appliedStreamIds;
-    }
+    void StartEpoch(const EpochConfiguration&) override {}
 
-    const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
-    {
-        return m_outputStreams;
-    }
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream) override;
 
-    SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                          const StreamDescription &inputStream,
-                          const StreamDescription &outputStream) override;
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;
 
 private:
     template <class TElement>
-    SequenceDataPtr TypedApply(SequenceDataPtr inputSequence,
-                               const StreamDescription &inputStream,
-                               const StreamDescription &outputStream);
+    SequenceDataPtr TypedTransform(SequenceDataPtr inputSequence);
 
-    std::vector<StreamDescriptionPtr> m_outputStreams;
-    std::vector<StreamId> m_appliedStreamIds;
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
 };
 
 // Intensity jittering based on PCA transform as described in original AlexNet paper
@@ -176,14 +154,12 @@ private:
 class IntensityTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
-
-private:
-    void InitFromConfig(const ConfigParameters &config);
+    explicit IntensityTransformer(const ConfigParameters& config);
 
     void StartEpoch(const EpochConfiguration &config) override;
-
     void Apply(size_t id, cv::Mat &mat) override;
+
+private:
     template <typename ElemType>
     void Apply(cv::Mat &mat);
 
@@ -201,14 +177,11 @@ private:
 class ColorTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit ColorTransformer(const ConfigParameters& config);
+    void StartEpoch(const EpochConfiguration &config) override;
+    void Apply(size_t id, cv::Mat &mat) override;
 
 private:
-    void InitFromConfig(const ConfigParameters &config);
-
-    void StartEpoch(const EpochConfiguration &config) override;
-
-    void Apply(size_t id, cv::Mat &mat) override;
     template <typename ElemType>
     void Apply(cv::Mat &mat);
 
@@ -223,4 +196,5 @@ private:
     conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;
 };
 
+
 }}}
diff --git a/Source/Readers/ReaderLib/BlockRandomizer.h b/Source/Readers/ReaderLib/BlockRandomizer.h
index 74a92037a..d4817d946 100644
--- a/Source/Readers/ReaderLib/BlockRandomizer.h
+++ b/Source/Readers/ReaderLib/BlockRandomizer.h
@@ -7,7 +7,7 @@
 
 #include <vector>
 
-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "DataDeserializer.h"
 #include "ChunkRandomizer.h"
 #include "SequenceRandomizer.h"
@@ -32,7 +32,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // Actual randomization happens in ChunkRandomizer and SequenceRandomizer.
 // TODO: The behavior can be simplified by only randomizing sequences forward.
 // TODO: The layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
-class BlockRandomizer : public Transformer
+class BlockRandomizer : public SequenceEnumerator
 {
 public:
     // Currently, decimation based on sequences or chunks is supported.
@@ -50,8 +50,6 @@ public:
         bool useLegacyRandomization = false,
         bool multithreadedGetNextSequences = false);
 
-    virtual void Initialize(TransformerPtr, const ConfigParameters&) override {};
-
     // Starts a new epoch.
     virtual void StartEpoch(const EpochConfiguration& config) override;
 
diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/CompositeTransformer.h
index 7bf65bec1..0eb9987d6 100644
--- a/Source/Readers/ReaderLib/CompositeTransformer.h
+++ b/Source/Readers/ReaderLib/CompositeTransformer.h
@@ -8,46 +8,30 @@
 #include <set>
 
 #include "Transformer.h"
+#include "SequenceEnumerator.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
 struct Transformation
 {
-    SlimTransformerPtr m_transfromer;
+    TransformerPtr m_transfromer;
     std::wstring m_streamName;
 };
 
-class CompositeTransformer : public Transformer
+class TransformController : public SequenceEnumerator
 {
 public:
-    CompositeTransformer(const std::vector<Transformation>& transformations)
+    TransformController(const std::vector<Transformation>& transformations, SequenceEnumeratorPtr randomizer)
+        : m_randomizer(randomizer)
     {
-        for (const auto& t: transformations)
-        {
-            m_transformations.push_back(std::make_pair(t, 0ul));
-        }
-    }
-
-    // Initializes the transformer.
-    virtual void Initialize(TransformerPtr next,
-                            const ConfigParameters &) override
-    {
-        m_next = next;
         m_chainOfStreamDescriptions.reserve(m_transformations.size() + 1);
-        std::vector<StreamDescriptionPtr> streams = m_next->GetStreamDescriptions();
+        std::vector<StreamDescriptionPtr> streams = m_randomizer->GetStreamDescriptions();
         m_chainOfStreamDescriptions.push_back(streams);
-        for (auto& t : m_transformations)
+        for (auto& t : transformations)
         {
-            // filling in stream id for the transform
-            for (const auto& s: streams)
-            {
-                if (s->m_name == t.first.m_streamName)
-                {
-                    t.second = s->m_id;
-                }
-            }
-
-            streams[t.second] = std::make_shared<StreamDescription>(t.first.m_transfromer->Transform(*streams[t.second]));
+            size_t streamId = GetStreamId(t.m_streamName, streams);
+            m_transformations.push_back(std::make_pair(t, streamId));
+            streams[streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*streams[streamId]));
             m_chainOfStreamDescriptions.push_back(streams);
         }
     }
@@ -60,7 +44,8 @@ public:
         {
             t.first.m_transfromer->StartEpoch(config);
         }
-        m_next->StartEpoch(config);
+
+        m_randomizer->StartEpoch(config);
     }
 
     // Description of streams that the transformer provides.
@@ -74,7 +59,7 @@ public:
     virtual Sequences GetNextSequences(size_t sampleCount) override
     {
         assert(m_next != nullptr);
-        Sequences sequences = m_next->GetNextSequences(sampleCount);
+        Sequences sequences = m_randomizer->GetNextSequences(sampleCount);
         if (sequences.m_data.empty())
         {
             return sequences;
@@ -93,7 +78,21 @@ public:
     }
 
 private:
-    TransformerPtr m_next;
+    size_t GetStreamId(const std::wstring streamName, const std::vector<StreamDescriptionPtr>& streams) const
+    {
+        for (const auto& s : streams)
+        {
+            if (s->m_name == streamName)
+            {
+                return s->m_id;
+            }
+        }
+
+        assert(false);
+        LogicError("Unexpected stream specifed for transformation.");
+    }
+
+    SequenceEnumeratorPtr m_randomizer;
     std::vector<std::pair<Transformation, size_t>> m_transformations;
     std::vector<std::vector<StreamDescriptionPtr>> m_chainOfStreamDescriptions;
 };
diff --git a/Source/Readers/ReaderLib/FramePacker.h b/Source/Readers/ReaderLib/FramePacker.h
index 89723f6cb..2990051f7 100644
--- a/Source/Readers/ReaderLib/FramePacker.h
+++ b/Source/Readers/ReaderLib/FramePacker.h
@@ -15,12 +15,10 @@ class FramePacker : public SequencePacker
 public:
     FramePacker(
         MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
         const std::vector<StreamDescriptionPtr>& streams) :
-        SequencePacker(memoryProvider, transformer, streams)
-    {
-
-    }
+        SequencePacker(memoryProvider, sequenceEnumerator, streams)
+    {}
 
 private:
 
diff --git a/Source/Readers/ReaderLib/NoRandomizer.cpp b/Source/Readers/ReaderLib/NoRandomizer.cpp
index 8e16bc4ef..fc0a8d0ea 100644
--- a/Source/Readers/ReaderLib/NoRandomizer.cpp
+++ b/Source/Readers/ReaderLib/NoRandomizer.cpp
@@ -42,10 +42,6 @@ NoRandomizer::NoRandomizer(IDataDeserializerPtr deserializer, bool multithreaded
     m_totalNumberOfSamples = sampleCount;
 }
 
-void NoRandomizer::Initialize(TransformerPtr, const ConfigParameters&)
-{
-}
-
 size_t NoRandomizer::GetChunkIndexOf(size_t samplePosition)
 {
     auto result = std::upper_bound(m_chunkSampleOffset.begin(), m_chunkSampleOffset.end(), samplePosition);
diff --git a/Source/Readers/ReaderLib/NoRandomizer.h b/Source/Readers/ReaderLib/NoRandomizer.h
index a68e0f8e9..97e866b9f 100644
--- a/Source/Readers/ReaderLib/NoRandomizer.h
+++ b/Source/Readers/ReaderLib/NoRandomizer.h
@@ -6,10 +6,8 @@
 #pragma once
 
 #include <vector>
-#include <map>
-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "DataDeserializer.h"
-#include "SequenceRandomizer.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -18,12 +16,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // TODO: currently this code moved from the old block randomizer.
 // TODO: The class will be further refactored and common based will be extracted with BlockRandomizer.
 // TODO: This layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
-class NoRandomizer : public Transformer
+class NoRandomizer : public SequenceEnumerator
 {
 public:
     NoRandomizer(IDataDeserializerPtr deserializer, bool multithreadedGetNextSequences = false);
 
-    virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
     virtual void StartEpoch(const EpochConfiguration& config) override;
     virtual Sequences GetNextSequences(size_t sampleCount) override;
     virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
diff --git a/Source/Readers/ReaderLib/PackerBase.cpp b/Source/Readers/ReaderLib/PackerBase.cpp
index c629af509..324f1f9ba 100644
--- a/Source/Readers/ReaderLib/PackerBase.cpp
+++ b/Source/Readers/ReaderLib/PackerBase.cpp
@@ -34,13 +34,13 @@ void PackerBase::StartEpoch(const EpochConfiguration& config)
 }
 
 PackerBase::PackerBase(MemoryProviderPtr memoryProvider,
-    TransformerPtr transformer,
+    SequenceEnumeratorPtr sequenceEnumerator,
     const std::vector<StreamDescriptionPtr>& streams) :
-    m_transformer(transformer),
+    m_sequenceEnumerator(sequenceEnumerator),
     m_minibatchSize(0),
     m_outputStreamDescriptions(streams)
 {
-    m_inputStreamDescriptions = m_transformer->GetStreamDescriptions();
+    m_inputStreamDescriptions = sequenceEnumerator->GetStreamDescriptions();
     assert(m_inputStreamDescriptions.size() != 0);
     assert(m_inputStreamDescriptions.size() == m_outputStreamDescriptions.size());
 
diff --git a/Source/Readers/ReaderLib/PackerBase.h b/Source/Readers/ReaderLib/PackerBase.h
index 8b25248f0..c19beb8f3 100644
--- a/Source/Readers/ReaderLib/PackerBase.h
+++ b/Source/Readers/ReaderLib/PackerBase.h
@@ -7,9 +7,8 @@
 
 #include "Reader.h"
 #include "MemoryProvider.h"
-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "Packer.h"
-#include <deque>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
@@ -34,7 +33,7 @@ protected:
     };
 
     PackerBase(MemoryProviderPtr memoryProvider,
-               TransformerPtr transformer,
+               SequenceEnumeratorPtr sequenceEnumerator,
                const std::vector<StreamDescriptionPtr>& streams);
 
     typedef std::vector<SequenceDataPtr> StreamBatch;
@@ -57,7 +56,7 @@ protected:
     // (sampleOffset is equal to the sum of sample sizes of all preceding samples).
     void PackDenseSample(char* destination, SequenceDataPtr sequence, size_t sampleOffset, size_t sampleSize);
 
-    TransformerPtr m_transformer;
+    SequenceEnumeratorPtr m_sequenceEnumerator;
 
     // Input stream descriptions provided by the transformer.
     std::vector<StreamDescriptionPtr> m_outputStreamDescriptions;
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj
index 3c4701941..aa8a74458 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj
@@ -49,6 +49,7 @@
     <ClInclude Include="BlockRandomizer.h" />
     <ClInclude Include="Packer.h" />
     <ClInclude Include="PackerBase.h" />
+    <ClInclude Include="SequenceEnumerator.h" />
     <ClInclude Include="SequencePacker.h" />
     <ClInclude Include="SequenceRandomizer.h" />
     <ClInclude Include="StringToIdMap.h" />
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
index c880ccf0b..296c0477c 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
@@ -64,12 +64,18 @@
     <ClInclude Include="TruncatedBpttPacker.h">
       <Filter>Packers</Filter>
     </ClInclude>
-    <ClInclude Include="..\ExperimentalHTKMLFReader\CorpusDescriptor.h">
-      <Filter>Utils</Filter>
-    </ClInclude>
     <ClInclude Include="CompositeTransformer.h">
       <Filter>Transformers</Filter>
     </ClInclude>
+    <ClInclude Include="SequenceEnumerator.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="ConfigUtil.h">
+      <Filter>Utils</Filter>
+    </ClInclude>
+    <ClInclude Include="CorpusDescriptor.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="NoRandomizer.cpp">
diff --git a/Source/Readers/ReaderLib/SequenceEnumerator.h b/Source/Readers/ReaderLib/SequenceEnumerator.h
new file mode 100644
index 000000000..74fc88a0d
--- /dev/null
+++ b/Source/Readers/ReaderLib/SequenceEnumerator.h
@@ -0,0 +1,55 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ConfigParameters;
+
+// Defines a set of sequences.
+struct Sequences
+{
+    Sequences()
+        : m_endOfEpoch(false)
+    {
+    }
+
+    // Data for up to a requested number of sequences.
+    // Indices in the inner vector have to correspond to the stream IDs
+    // given by GetStream().
+    std::vector<std::vector<SequenceDataPtr>> m_data;
+
+    // Indicates whether the epoch ends with the data returned.
+    bool m_endOfEpoch;
+};
+
+class SequenceEnumerator;
+typedef std::shared_ptr<SequenceEnumerator> SequenceEnumeratorPtr;
+
+// Sequence enumerator is used by the packer to get a set of new sequences.
+// This interface is internal to CNTK and not exposed to the developers of deserializers/plugins.
+class SequenceEnumerator
+{
+public:
+    // Describes streams the transformer produces.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+
+    // Sets current epoch configuration.
+    virtual void StartEpoch(const EpochConfiguration& config) = 0;
+
+    // Gets next sequences up to a maximum count of samples.
+    // The return value can be used until the next call to GetNextSequences.
+    virtual Sequences GetNextSequences(size_t sampleCount) = 0;
+
+    virtual ~SequenceEnumerator()
+    {
+    }
+};
+
+}}}
diff --git a/Source/Readers/ReaderLib/SequencePacker.cpp b/Source/Readers/ReaderLib/SequencePacker.cpp
index ae597a281..0b99f83da 100644
--- a/Source/Readers/ReaderLib/SequencePacker.cpp
+++ b/Source/Readers/ReaderLib/SequencePacker.cpp
@@ -38,7 +38,7 @@ MBLayoutPtr SequencePacker::CreateMBLayout(const StreamBatch& batch)
 
 Minibatch SequencePacker::ReadMinibatch()
 {
-    auto sequences = m_transformer->GetNextSequences(m_minibatchSize);
+    auto sequences = m_sequenceEnumerator->GetNextSequences(m_minibatchSize);
     const auto& batch = sequences.m_data;
 
     Minibatch minibatch(sequences.m_endOfEpoch);
diff --git a/Source/Readers/ReaderLib/SequencePacker.h b/Source/Readers/ReaderLib/SequencePacker.h
index 4a85fcea3..29d09d1b1 100644
--- a/Source/Readers/ReaderLib/SequencePacker.h
+++ b/Source/Readers/ReaderLib/SequencePacker.h
@@ -16,9 +16,9 @@ class SequencePacker : public PackerBase
 public:
     SequencePacker(
         MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
         const std::vector<StreamDescriptionPtr>& streams) :
-        PackerBase(memoryProvider, transformer, streams)
+        PackerBase(memoryProvider, sequenceEnumerator, streams)
     {
 
     }
diff --git a/Source/Readers/ReaderLib/Transformer.h b/Source/Readers/ReaderLib/Transformer.h
index f71398d3e..a48783a8b 100644
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@@ -10,72 +10,28 @@
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
-class ConfigParameters;
-
-// Defines a set of sequences.
-struct Sequences
-{
-    Sequences()
-        : m_endOfEpoch(false)
-    {
-    }
-
-    // Data for up to a requested number of sequences.
-    // Indices in the inner vector have to correspond to the stream IDs
-    // given by GetStream().
-    std::vector<std::vector<SequenceDataPtr>> m_data;
-
-    // Indicates whether the epoch ends with the data returned.
-    bool m_endOfEpoch;
-};
-
 class Transformer;
 typedef std::shared_ptr<Transformer> TransformerPtr;
 
+// Defines a data transformation interface.
+// Transformers are responsible for doing custom transformation of sequences.
+// For example for images, there could be scale, crop, or median transformation.
 class Transformer
 {
 public:
-    // Initialization.
-    virtual void Initialize(
-        TransformerPtr next,
-        const ConfigParameters& readerConfig) = 0;
+    // Starts a new epoch. Some transformers have to change their configuration
+    // based on the epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) = 0;
 
-    // Describes streams the transformer produces.
-    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+    // Transforms input stream into output stream.
+    virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;
 
-    // Sets current epoch configuration.
-    virtual void StartEpoch(const EpochConfiguration& config) = 0;
-
-    // Gets next sequences up to a maximum count of samples.
-    // The return value can be used until the next call to GetNextSequences.
-    virtual Sequences GetNextSequences(size_t sampleCount) = 0;
+    // Transforms input sequences into output sequence.
+    virtual SequenceDataPtr Transform(SequenceDataPtr sequence) = 0;
 
     virtual ~Transformer()
     {
     }
 };
 
-// Defines a data transformation interface.
-// Transformers are responsible for doing custom transformation of sequences.
-// For example for images, there could be scale, crop, or median transformation.
-class SlimTransformer
-{
-public:
-    // Starts a new epoch.
-    virtual void StartEpoch(const EpochConfiguration &config) = 0;
-
-    // Transformation of the stream.
-    virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;
-
-    // Transformation of the sequence.
-    virtual SequenceDataPtr Transform(SequenceDataPtr sequence) = 0;
-
-    virtual ~SlimTransformer()
-    {
-    }
-};
-
-typedef std::shared_ptr<SlimTransformer> SlimTransformerPtr;
-
-
 }}}
diff --git a/Source/Readers/ReaderLib/TransformerBase.h b/Source/Readers/ReaderLib/TransformerBase.h
index 973b16568..3e49ff562 100644
--- a/Source/Readers/ReaderLib/TransformerBase.h
+++ b/Source/Readers/ReaderLib/TransformerBase.h
@@ -8,6 +8,7 @@
 #include <set>
 
 #include "Transformer.h"
+#include <SequenceEnumerator.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp b/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
index 26fb46a1b..162182882 100644
--- a/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
+++ b/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
@@ -107,9 +107,9 @@ struct SequenceBuffer
 
 TruncatedBPTTPacker::TruncatedBPTTPacker(
     MemoryProviderPtr memoryProvider,
-    TransformerPtr transformer,
+    SequenceEnumeratorPtr sequenceEnumerator,
     const vector<StreamDescriptionPtr>& streams)
-    : PackerBase(memoryProvider, transformer, streams),
+    : PackerBase(memoryProvider, sequenceEnumerator, streams),
     m_truncationSize(0)
 {
     auto sparseOutput = find_if(m_outputStreamDescriptions.begin(), m_outputStreamDescriptions.end(), [](const StreamDescriptionPtr& s){ return s->m_storageType == StorageType::sparse_csc; });
@@ -312,7 +312,7 @@ void TruncatedBPTTPacker::ReadSequencesToSlot(size_t slotIndex)
     {
         // We need a single sequence, potentially we can request (m_truncationSize - slot.AvailableNumberOfSamples())
         // to be more efficient. In reality the truncation size usually is less the sequence size.
-        auto s = m_transformer->GetNextSequences(1);
+        auto s = m_sequenceEnumerator->GetNextSequences(1);
         if (s.m_endOfEpoch)
         {
             break;
diff --git a/Source/Readers/ReaderLib/TruncatedBpttPacker.h b/Source/Readers/ReaderLib/TruncatedBpttPacker.h
index d8c33654c..c28986856 100644
--- a/Source/Readers/ReaderLib/TruncatedBpttPacker.h
+++ b/Source/Readers/ReaderLib/TruncatedBpttPacker.h
@@ -7,7 +7,6 @@
 
 #include "Reader.h"
 #include "MemoryProvider.h"
-#include "Transformer.h"
 #include "PackerBase.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
@@ -23,7 +22,7 @@ class TruncatedBPTTPacker : public PackerBase
 public:
     TruncatedBPTTPacker(
         MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
         const std::vector<StreamDescriptionPtr>& streams);
 
     virtual Minibatch ReadMinibatch() override;

From eae3e85807d3c900a7a5a8077ca45c41eb9674f7 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 4 May 2016 09:36:23 +0200
Subject: [PATCH 35/51] Removing unused function declaration

---
 Source/Readers/ImageReader/ImageConfigHelper.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageConfigHelper.h b/Source/Readers/ImageReader/ImageConfigHelper.h
index 7bec200c7..689ce8349 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.h
+++ b/Source/Readers/ImageReader/ImageConfigHelper.h
@@ -83,7 +83,5 @@ private:
     CropType m_cropType;
 };
 
-std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName);
-
 typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
 } } }

From 3d1c79faba100f7ac9bbbfffc06681eeb5043957 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 4 May 2016 10:28:29 +0200
Subject: [PATCH 36/51] Some refactoring

---
 .../CompositeDataReader.cpp                   |   1 -
 .../CompositeDataReader/CompositeDataReader.h |   2 +-
 .../Readers/ImageReader/ImageConfigHelper.cpp |  20 +---
 .../ImageReader/ImageDataDeserializer.cpp     |   3 +-
 Source/Readers/ImageReader/ImageReader.cpp    |   2 +-
 .../ImageReader/ImageReader.vcxproj.filters   |   4 +-
 .../Readers/ImageReader/ImageTransformers.cpp | 105 +++++++++++-------
 .../Readers/ImageReader/ImageTransformers.h   |  24 ++--
 Source/Readers/ReaderLib/BlockRandomizer.h    |   1 -
 Source/Readers/ReaderLib/ConfigUtil.h         |  11 ++
 .../Readers/ReaderLib/DataDeserializerBase.h  |   1 -
 Source/Readers/ReaderLib/NoRandomizer.h       |   1 -
 Source/Readers/ReaderLib/ReaderLib.vcxproj    |   3 +-
 .../ReaderLib/ReaderLib.vcxproj.filters       |   9 +-
 ...iteTransformer.h => TransformController.h} |   0
 Source/Readers/ReaderLib/TransformerBase.h    |  92 ---------------
 16 files changed, 101 insertions(+), 178 deletions(-)
 rename Source/Readers/ReaderLib/{CompositeTransformer.h => TransformController.h} (100%)
 delete mode 100644 Source/Readers/ReaderLib/TransformerBase.h

diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 33f2b50ff..41fd8841e 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -21,7 +21,6 @@
 #include "SequencePacker.h"
 #include "TruncatedBpttPacker.h"
 #include "CorpusDescriptor.h"
-#include "CompositeTransformer.h"
 #include "ConfigUtil.h"
 #include <omp.h>
 
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.h b/Source/Readers/CompositeDataReader/CompositeDataReader.h
index c170f81a2..3398de8e0 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@@ -11,7 +11,7 @@
 #include "DataReader.h"
 #include "Reader.h"
 #include "Transformer.h"
-#include "CompositeTransformer.h"
+#include "TransformController.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ImageReader/ImageConfigHelper.cpp b/Source/Readers/ImageReader/ImageConfigHelper.cpp
index 09f33064a..b5af5b489 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp
@@ -6,28 +6,10 @@
 #include "stdafx.h"
 #include "ImageConfigHelper.h"
 #include "StringUtil.h"
+#include "ConfigUtil.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
-std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
-{
-    std::vector<std::string> sectionNames;
-    for (const std::pair<std::string, ConfigParameters>& section : config)
-    {
-        if (section.second.ExistsCurrent(parameterName))
-        {
-            sectionNames.push_back(section.first);
-        }
-    }
-
-    if (sectionNames.empty())
-    {
-        RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
-    }
-
-    return sectionNames;
-}
-
 ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
     : m_dataFormat(CHW)
 {
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.cpp b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
index 88141bdc4..72047bf7b 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
@@ -11,7 +11,8 @@
 #include <limits>
 #include "ImageDataDeserializer.h"
 #include "ImageConfigHelper.h"
-#include <StringUtil.h>
+#include "StringUtil.h"
+#include "ConfigUtil.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index 43e9e0bde..6d04827f9 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -11,7 +11,7 @@
 #include "NoRandomizer.h"
 #include "ImageDataDeserializer.h"
 #include "FramePacker.h"
-#include "CompositeTransformer.h"
+#include "TransformController.h"
 #include <omp.h>
 #include "ImageTransformers.h"
 
diff --git a/Source/Readers/ImageReader/ImageReader.vcxproj.filters b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
index 69ab43763..7a8de5124 100644
--- a/Source/Readers/ImageReader/ImageReader.vcxproj.filters
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
@@ -4,11 +4,11 @@
     <ClCompile Include="Exports.cpp" />
     <ClCompile Include="stdafx.cpp" />
     <ClCompile Include="dllmain.cpp" />
+    <ClCompile Include="ImageTransformers.cpp" />
     <ClCompile Include="ImageDataDeserializer.cpp" />
     <ClCompile Include="ImageReader.cpp" />
     <ClCompile Include="ImageConfigHelper.cpp" />
     <ClCompile Include="ZipByteReader.cpp" />
-    <ClCompile Include="ImageTransformers.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="stdafx.h" />
@@ -22,11 +22,11 @@
     <ClInclude Include="..\..\Common\Include\fileutil.h">
       <Filter>Common\Include</Filter>
     </ClInclude>
+    <ClInclude Include="ImageTransformers.h" />
     <ClInclude Include="ImageDataDeserializer.h" />
     <ClInclude Include="ImageReader.h" />
     <ClInclude Include="ImageConfigHelper.h" />
     <ClInclude Include="ByteReader.h" />
-    <ClInclude Include="ImageTransformers.h" />
   </ItemGroup>
   <ItemGroup>
     <Filter Include="Common">
diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index 384390862..82ff38c2f 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -13,9 +13,8 @@
 #include "StringUtil.h"
 #include "ElementTypeUtils.h"
 
-namespace Microsoft {
-namespace MSR {
-namespace CNTK {
+namespace Microsoft { namespace MSR { namespace CNTK 
+{
 
 struct ImageSequenceData : DenseSequenceData
 {
@@ -24,10 +23,10 @@ struct ImageSequenceData : DenseSequenceData
     SequenceDataPtr m_original;
 };
 
-ImageTransformerBase::ImageTransformerBase(const ConfigParameters& cfg) : m_imageElementType(0)
+ImageTransformerBase::ImageTransformerBase(const ConfigParameters& readerConfig) : m_imageElementType(0)
 {
     m_imageConfig = std::make_unique<ImageConfigHelper>(readerConfig);
-    m_seed = cfg(L"seed", 0u);
+    m_seed = readerConfig(L"seed", 0u);
 }
 
 StreamDescription ImageTransformerBase::Transform(const StreamDescription& inputStream)
@@ -113,16 +112,22 @@ CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransfor
     {
         m_hFlip = config(L"hflip");
     }
+
+    m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
+}
+
+void CropTransformer::StartEpoch(const EpochConfiguration &config)
+{
+    m_curAspectRatioRadius = m_aspectRatioRadius[config.m_epochIndex];
+    if (!(0 <= m_curAspectRatioRadius && m_curAspectRatioRadius <= 1.0))
+        InvalidArgument("aspectRatioRadius must be >= 0.0 and <= 1.0");
+    ImageTransformerBase::StartEpoch(config);
 }
 
 void CropTransformer::Apply(size_t id, cv::Mat &mat)
 {
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create(
-        [seed]()
-    {
-        return std::make_unique<std::mt19937>(seed);
-    });
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
 
     double ratio = 1;
     switch (m_jitterType)
@@ -157,7 +162,8 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
-CropTransformer::CropType CropTransformer::ParseCropType(const std::string &src)
+CropTransformer::CropType
+CropTransformer::ParseCropType(const std::string &src)
 {
     if (src.empty() || AreEqualIgnoreCase(src, "center"))
     {
@@ -177,7 +183,8 @@ CropTransformer::CropType CropTransformer::ParseCropType(const std::string &src)
     RuntimeError("Invalid crop type: %s.", src.c_str());
 }
 
-CropTransformer::RatioJitterType CropTransformer::ParseJitterType(const std::string &src)
+CropTransformer::RatioJitterType
+CropTransformer::ParseJitterType(const std::string &src)
 {
     if (src.empty() || AreEqualIgnoreCase(src, "none"))
     {
@@ -209,20 +216,44 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
     assert(ccol > 0);
     assert(0 < cropRatio && cropRatio <= 1.0);
 
-    int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
+    // Get square crop size that preserves aspect ratio.
+    int cropSize = (int)(std::min(crow, ccol) * cropRatio);
+    int cropSizeX = cropSize;
+    int cropSizeY = cropSize;
+    // Change aspect ratio, if this option is enabled.
+    if (m_curAspectRatioRadius > 0)
+    {
+        double factor = 1.0 + UniRealT(-m_curAspectRatioRadius, m_curAspectRatioRadius)(rng);
+        double area = cropSize * cropSize;
+        double newArea = area * factor;
+        if (std::bernoulli_distribution()(rng))
+        {
+            cropSizeX = (int)std::sqrt(newArea);
+            cropSizeY = (int)(area / cropSizeX);
+        }
+        else
+        {
+            cropSizeY = (int)std::sqrt(newArea);
+            cropSizeX = (int)(area / cropSizeY);
+        }
+        // This clamping should be ok if jittering ratio is not too big.
+        cropSizeX = std::min(cropSizeX, ccol);
+        cropSizeY = std::min(cropSizeY, crow);
+    }
+
     int xOff = -1;
     int yOff = -1;
     switch (type)
     {
     case CropType::Center:
         assert(viewIndex == 0);
-        xOff = (ccol - cropSize) / 2;
-        yOff = (crow - cropSize) / 2;
+        xOff = (ccol - cropSizeX) / 2;
+        yOff = (crow - cropSizeY) / 2;
         break;
     case CropType::Random:
         assert(viewIndex == 0);
-        xOff = UniIntT(0, ccol - cropSize)(rng);
-        yOff = UniIntT(0, crow - cropSize)(rng);
+        xOff = UniIntT(0, ccol - cropSizeX)(rng);
+        yOff = UniIntT(0, crow - cropSizeY)(rng);
         break;
     case CropType::MultiView10:
     {
@@ -238,23 +269,23 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
             break;
             // top-right
         case 1:
-            xOff = ccol - cropSize;
+            xOff = ccol - cropSizeX;
             yOff = 0;
             break;
             // bottom-left
         case 2:
             xOff = 0;
-            yOff = crow - cropSize;
+            yOff = crow - cropSizeY;
             break;
             // bottom-right
         case 3:
-            xOff = ccol - cropSize;
-            yOff = crow - cropSize;
+            xOff = ccol - cropSizeX;
+            yOff = crow - cropSizeY;
             break;
             // center
         case 4:
-            xOff = (ccol - cropSize) / 2;
-            yOff = (crow - cropSize) / 2;
+            xOff = (ccol - cropSizeX) / 2;
+            yOff = (crow - cropSizeY) / 2;
             break;
         }
         break;
@@ -263,9 +294,9 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
         assert(false);
     }
 
-    assert(0 <= xOff && xOff <= ccol - cropSize);
-    assert(0 <= yOff && yOff <= crow - cropSize);
-    return cv::Rect(xOff, yOff, cropSize, cropSize);
+    assert(0 <= xOff && xOff <= ccol - cropSizeX);
+    assert(0 <= yOff && yOff <= crow - cropSizeY);
+    return cv::Rect(xOff, yOff, cropSizeX, cropSizeY);
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -291,7 +322,7 @@ ScaleTransformer::ScaleTransformer(const ConfigParameters& config) : ImageTransf
     {
         // Explicit cast required for GCC.
         std::transform(token.begin(), token.end(), token.begin(),
-                       (int(*) (int)) std::tolower);
+                       (int (*) (int)) std::tolower);
         StrToIntMapT::const_iterator res = m_interpMap.find(token);
         if (res != m_interpMap.end())
             m_interp.push_back((*res).second);
@@ -321,19 +352,11 @@ void ScaleTransformer::Apply(size_t id, cv::Mat &mat)
     }
 
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create(
-        [seed]()
-    {
-        return std::make_unique<std::mt19937>(seed);
-    });
-
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
 
     auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
     assert(m_interp.size() > 0);
-    cv::resize(
-        mat, mat,
-        cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0,
-        0, m_interp[index]);
+    cv::resize(mat, mat, cv::Size((int)m_imgWidth, (int)m_imgHeight), 0, 0, m_interp[index]);
 
     m_rngs.push(std::move(rng));
 }
@@ -419,7 +442,7 @@ SequenceDataPtr TransposeTransformer::Transform(SequenceDataPtr sequence)
 
 // The class represents a sequence that owns an internal data buffer.
 // Passed from the TransposeTransformer.
-// TODO: Trasposition potentially could be done in place.
+// TODO: Transposition potentially could be done in place (alexeyk: performance might be much worse than of out-of-place transpose).
 struct DenseSequenceWithBuffer : DenseSequenceData
 {
     std::vector<char> m_buffer;
@@ -487,6 +510,7 @@ IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : Ima
 void IntensityTransformer::StartEpoch(const EpochConfiguration &config)
 {
     m_curStdDev = m_stdDev[config.m_epochIndex];
+    ImageTransformerBase::StartEpoch(config);
 }
 
 void IntensityTransformer::Apply(size_t id, cv::Mat &mat)
@@ -508,7 +532,7 @@ template <typename ElemType>
 void IntensityTransformer::Apply(cv::Mat &mat)
 {
     auto seed = GetSeed();
-    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
+    auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); } );
 
     // Using single precision as EigVal and EigVec matrices are single precision.
     std::normal_distribution<float> d(0, (float)m_curStdDev);
@@ -559,6 +583,8 @@ void ColorTransformer::StartEpoch(const EpochConfiguration &config)
     m_curSaturationRadius = m_saturationRadius[config.m_epochIndex];
     if (!(0 <= m_curSaturationRadius && m_curSaturationRadius <= 1.0))
         InvalidArgument("saturationRadius must be >= 0.0 and <= 1.0");
+
+    ImageTransformerBase::StartEpoch(config);
 }
 
 void ColorTransformer::Apply(size_t id, cv::Mat &mat)
@@ -641,5 +667,4 @@ void ColorTransformer::Apply(cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
-
 }}}
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index 4de6c5dc4..e7ccc594a 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -64,8 +64,8 @@ class CropTransformer : public ImageTransformerBase
 public:
     explicit CropTransformer(const ConfigParameters& config);
 
-protected:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
+private:
+    void Apply(size_t id, cv::Mat &mat) override;
 
 private:
     enum class RatioJitterType
@@ -76,7 +76,6 @@ private:
         UniArea = 3
     };
 
-
     void StartEpoch(const EpochConfiguration &config) override;
 
     CropType ParseCropType(const std::string &src);
@@ -84,11 +83,14 @@ private:
     cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio,
                          std::mt19937 &rng);
 
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
     CropType m_cropType;
     double m_cropRatioMin;
     double m_cropRatioMax;
     RatioJitterType m_jitterType;
     bool m_hFlip;
+    doubleargvector m_aspectRatioRadius;
+    double m_curAspectRatioRadius;
 };
 
 // Scale transformation of the image.
@@ -101,12 +103,13 @@ public:
     StreamDescription Transform(const StreamDescription& inputStream) override;
 
 private:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
+    void Apply(size_t id, cv::Mat &mat) override;
 
     using StrToIntMapT = std::unordered_map<std::string, int>;
     StrToIntMapT m_interpMap;
     std::vector<int> m_interp;
 
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
     size_t m_imgWidth;
     size_t m_imgHeight;
     size_t m_imgChannels;
@@ -119,7 +122,7 @@ public:
     explicit MeanTransformer(const ConfigParameters& config);
 
 private:
-    virtual void Apply(size_t id, cv::Mat &mat) override;
+    void Apply(size_t id, cv::Mat &mat) override;
 
     cv::Mat m_meanImg;
 };
@@ -156,10 +159,10 @@ class IntensityTransformer : public ImageTransformerBase
 public:
     explicit IntensityTransformer(const ConfigParameters& config);
 
-    void StartEpoch(const EpochConfiguration &config) override;
-    void Apply(size_t id, cv::Mat &mat) override;
-
 private:
+    void StartEpoch(const EpochConfiguration &config) override;
+
+    void Apply(size_t id, cv::Mat &mat) override;
     template <typename ElemType>
     void Apply(cv::Mat &mat);
 
@@ -178,10 +181,11 @@ class ColorTransformer : public ImageTransformerBase
 {
 public:
     explicit ColorTransformer(const ConfigParameters& config);
-    void StartEpoch(const EpochConfiguration &config) override;
-    void Apply(size_t id, cv::Mat &mat) override;
 
 private:
+    void StartEpoch(const EpochConfiguration &config) override;
+
+    void Apply(size_t id, cv::Mat &mat) override;
     template <typename ElemType>
     void Apply(cv::Mat &mat);
 
diff --git a/Source/Readers/ReaderLib/BlockRandomizer.h b/Source/Readers/ReaderLib/BlockRandomizer.h
index d4817d946..5e1a1ccad 100644
--- a/Source/Readers/ReaderLib/BlockRandomizer.h
+++ b/Source/Readers/ReaderLib/BlockRandomizer.h
@@ -31,7 +31,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // This class is responsible for decimation and loading the data chunks in to memory.
 // Actual randomization happens in ChunkRandomizer and SequenceRandomizer.
 // TODO: The behavior can be simplified by only randomizing sequences forward.
-// TODO: The layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
 class BlockRandomizer : public SequenceEnumerator
 {
 public:
diff --git a/Source/Readers/ReaderLib/ConfigUtil.h b/Source/Readers/ReaderLib/ConfigUtil.h
index 8252186fe..7f2ed609a 100644
--- a/Source/Readers/ReaderLib/ConfigUtil.h
+++ b/Source/Readers/ReaderLib/ConfigUtil.h
@@ -26,4 +26,15 @@ inline std::vector<std::string> TryGetSectionsWithParameter(const ConfigParamete
     return sectionNames;
 }
 
+// Helper function to get sections that contains specified parameter. Throws if the parameter does not exist.
+inline std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
+{
+    auto result = TryGetSectionsWithParameter(config, parameterName);
+    if (result.empty())
+    {
+        RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
+    }
+    return result;
+}
+
 }}}
diff --git a/Source/Readers/ReaderLib/DataDeserializerBase.h b/Source/Readers/ReaderLib/DataDeserializerBase.h
index 908eb6f0c..ebcd6c39d 100644
--- a/Source/Readers/ReaderLib/DataDeserializerBase.h
+++ b/Source/Readers/ReaderLib/DataDeserializerBase.h
@@ -6,7 +6,6 @@
 #pragma once
 
 #include "DataDeserializer.h"
-#include <Config.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ReaderLib/NoRandomizer.h b/Source/Readers/ReaderLib/NoRandomizer.h
index 97e866b9f..d14497d9b 100644
--- a/Source/Readers/ReaderLib/NoRandomizer.h
+++ b/Source/Readers/ReaderLib/NoRandomizer.h
@@ -15,7 +15,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // Used training where the training data has already been pre - randomized.
 // TODO: currently this code moved from the old block randomizer.
 // TODO: The class will be further refactored and common based will be extracted with BlockRandomizer.
-// TODO: This layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
 class NoRandomizer : public SequenceEnumerator
 {
 public:
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj b/Source/Readers/ReaderLib/ReaderLib.vcxproj
index aa8a74458..2851023ee 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj
@@ -44,7 +44,7 @@
     <ClInclude Include="CorpusDescriptor.h" />
     <ClInclude Include="Bundler.h" />
     <ClInclude Include="ChunkRandomizer.h" />
-    <ClInclude Include="CompositeTransformer.h" />
+    <ClInclude Include="TransformController.h" />
     <ClInclude Include="DataDeserializerBase.h" />
     <ClInclude Include="BlockRandomizer.h" />
     <ClInclude Include="Packer.h" />
@@ -53,7 +53,6 @@
     <ClInclude Include="SequencePacker.h" />
     <ClInclude Include="SequenceRandomizer.h" />
     <ClInclude Include="StringToIdMap.h" />
-    <ClInclude Include="TransformerBase.h" />
     <ClInclude Include="NoRandomizer.h" />
     <ClInclude Include="CudaMemoryProvider.h" />
     <ClInclude Include="DataDeserializer.h" />
diff --git a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
index 296c0477c..dc220e57a 100644
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
@@ -28,9 +28,6 @@
     <ClInclude Include="ElementTypeUtils.h">
       <Filter>Utils</Filter>
     </ClInclude>
-    <ClInclude Include="TransformerBase.h">
-      <Filter>Transformers</Filter>
-    </ClInclude>
     <ClInclude Include="DataDeserializerBase.h">
       <Filter>Deserializers</Filter>
     </ClInclude>
@@ -64,9 +61,6 @@
     <ClInclude Include="TruncatedBpttPacker.h">
       <Filter>Packers</Filter>
     </ClInclude>
-    <ClInclude Include="CompositeTransformer.h">
-      <Filter>Transformers</Filter>
-    </ClInclude>
     <ClInclude Include="SequenceEnumerator.h">
       <Filter>Interfaces</Filter>
     </ClInclude>
@@ -76,6 +70,9 @@
     <ClInclude Include="CorpusDescriptor.h">
       <Filter>Interfaces</Filter>
     </ClInclude>
+    <ClInclude Include="TransformController.h">
+      <Filter>Transformers</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="NoRandomizer.cpp">
diff --git a/Source/Readers/ReaderLib/CompositeTransformer.h b/Source/Readers/ReaderLib/TransformController.h
similarity index 100%
rename from Source/Readers/ReaderLib/CompositeTransformer.h
rename to Source/Readers/ReaderLib/TransformController.h
diff --git a/Source/Readers/ReaderLib/TransformerBase.h b/Source/Readers/ReaderLib/TransformerBase.h
deleted file mode 100644
index 3e49ff562..000000000
--- a/Source/Readers/ReaderLib/TransformerBase.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//
-// Copyright (c) Microsoft. All rights reserved.
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-
-#pragma once
-
-#include <set>
-
-#include "Transformer.h"
-#include <SequenceEnumerator.h>
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-class TransformerBase : public Transformer
-{
-public:
-    // Initializes the transformer.
-    virtual void Initialize(TransformerPtr next,
-                            const ConfigParameters &) override
-    {
-        m_next = next;
-        m_inputStreams = m_next->GetStreamDescriptions();
-    }
-
-    // Sets configuration for the current epoch.
-    virtual void StartEpoch(const EpochConfiguration &config) override
-    {
-        assert(m_next != nullptr);
-        m_next->StartEpoch(config);
-    }
-
-    // Description of streams that the transformer provides.
-    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
-    {
-        return this->GetOutputStreams();
-    }
-
-    // Gets next sequences up to a maximum count of samples.
-    // Sequences contains data for all streams.
-    virtual Sequences GetNextSequences(size_t sampleCount) override
-    {
-        assert(m_next != nullptr);
-        Sequences samples = m_next->GetNextSequences(sampleCount);
-
-        if (samples.m_data.empty())
-        {
-            return samples;
-        }
-
-        const auto &appliedStreamIds = GetAppliedStreamIds();
-        const auto &outputStreams = GetOutputStreams();
-
-        // TODO: Move parallelization on the outer loop with collapse.
-        for (int j = 0; j < appliedStreamIds.size(); ++j)
-        {
-            size_t streamId = appliedStreamIds[j];
-            auto& allSamples = samples.m_data[streamId];
-
-#pragma omp parallel for schedule(dynamic)
-            for (int i = 0; i < allSamples.size(); ++i)
-            {
-                allSamples[i] = Apply(allSamples[i], *m_inputStreams[streamId], *outputStreams[streamId]);
-            }
-        }
-        return samples;
-    }
-
-protected:
-    virtual const std::vector<StreamId> &GetAppliedStreamIds() const = 0;
-    virtual const std::vector<StreamDescriptionPtr> &GetOutputStreams() const
-    {
-        return m_inputStreams;
-    }
-
-    const std::vector<StreamDescriptionPtr> &GetInputStreams()
-    {
-        return m_inputStreams;
-    }
-
-private:
-    // Applies transformation to the sequence.
-    virtual SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                                  const StreamDescription &inputStream,
-                                  const StreamDescription &outputStream) = 0;
-
-    TransformerPtr m_next;
-    std::vector<StreamId> m_featureStreamIds;
-    std::vector<StreamDescriptionPtr> m_inputStreams;
-};
-
-}}}

From 33b0c4a8aaf3e32ceec7840ba77e190ec29dcb06 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 4 May 2016 10:38:17 +0200
Subject: [PATCH 37/51] Adding comments to the configuration

---
 .../EndToEndTests/Image/AlexNet/AlexNet.cntk  | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index b1ecfa6ee..6436bed34 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -19,7 +19,7 @@ Train=[
     NDLNetworkBuilder=[
         networkDescription=$ConfigDir$/AlexNet.ndl
     ]
-    
+
     SGD=[
         epochSize=0
         minibatchSize=16
@@ -46,7 +46,7 @@ Train=[
         verbosity = 0
         randomize = true
 
-        # Currently,f for image reader a single sequence is a chunk
+        # Currently for image reader a single sequence is a chunk
         # so setting randomization window to 1.
         randomizationWindow =1
         useLegacy = false
@@ -54,7 +54,7 @@ Train=[
         # A list of deserializers to use.
         deserializers = [
             [   
-                type = "ImageDataDeserializer"        
+                type = "ImageDataDeserializer"
                 module = "ImageReader"
 
                 # Map file which maps images to labels using the following format:
@@ -69,17 +69,24 @@ Train=[
                         transforms=[
                             [
                                 type="Crop"
+                                # Possible values: Center, Random. Default: Center
                                 cropType=Random
+                                # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
                                 cropRatio=0.875
+                                # Crop scale ratio jitter type.
+                                # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
                                 jitterType=UniRatio
                             ]:[
                                 type="Scale"
                                 width=224
                                 height=224
                                 channels=3
+                                # Interpolation to use when scaling image to width x height size.
+                                # Possible values: nearest, linear, cubic, lanczos. Default: linear.
                                 interpolations=Linear
                             ]:[
                                 type="Mean"
+                                # Stores mean values for each pixel in OpenCV matrix XML format.
                                 meanFile=$ConfigDir$/ImageNet1K_mean.xml
                             ]:[
                                 type="Transpose"
@@ -95,7 +102,7 @@ Train=[
     ]    
 ]
 
-AddTop5Eval=[    
+AddTop5Eval=[
     action=edit
     CurModel=$ModelDir$/AlexNet
     NewModel=$ModelDir$/AlexNet.Top5
@@ -118,8 +125,8 @@ Test=[
 
         # A list of deserializers to use.
         deserializers = [
-            [   
-                type = "ImageDataDeserializer"        
+            [
+                type = "ImageDataDeserializer"
                 module = "ImageReader"
                 file=$ConfigDir$/val_map.txt
 

From 6f3a4f346746e0b5d69c8a0327a14cf8f4b0c57b Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 4 May 2016 13:16:48 +0200
Subject: [PATCH 38/51] Some refactoring

---
 .../CNTKTextFormatReader.cpp                  |  8 ++---
 .../CNTKTextFormatReader.h                    |  4 +--
 .../CompositeDataReader.cpp                   | 22 +++++++++----
 Source/Readers/ImageReader/Exports.cpp        | 22 ++-----------
 .../ImageReader/ImageDataDeserializer.h       |  4 +--
 Source/Readers/ImageReader/ImageReader.cpp    |  4 +--
 .../Readers/ImageReader/ImageTransformers.h   |  6 ++--
 .../Readers/ReaderLib/TransformController.h   | 33 ++++++++++---------
 8 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
index 2fc53233f..2486ba56d 100644
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
@@ -36,17 +36,17 @@ CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
         {
             // Verbosity is a general config parameter, not specific to the text format reader.
             int verbosity = config(L"verbosity", 2);
-            m_sequenceEnumerator = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
+            m_randomizer = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
         }
         else
         {
-            m_sequenceEnumerator = std::make_shared<NoRandomizer>(m_deserializer);
+            m_randomizer = std::make_shared<NoRandomizer>(m_deserializer);
         }
 
         // TODO: add "frameMode"  config paramter
         m_packer = std::make_shared<SequencePacker>(
             m_provider,
-            m_sequenceEnumerator,
+            m_randomizer,
             GetStreamDescriptions());
     }
     catch (const std::runtime_error& e)
@@ -67,7 +67,7 @@ void CNTKTextFormatReader::StartEpoch(const EpochConfiguration& config)
         RuntimeError("Epoch size cannot be 0.");
     }
 
-    m_sequenceEnumerator->StartEpoch(config);
+    m_randomizer->StartEpoch(config);
     m_packer->StartEpoch(config);
 }
 
diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
index f9589e96c..4d842b785 100644
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
@@ -33,8 +33,8 @@ public:
 private:
     IDataDeserializerPtr m_deserializer;
 
-    // A head transformer in a list of transformers.
-    SequenceEnumeratorPtr m_sequenceEnumerator;
+    // Randomizer.
+    SequenceEnumeratorPtr m_randomizer;
 
     // Packer.
     PackerPtr m_packer;
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 41fd8841e..60620fac3 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -86,20 +86,22 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
 
     // Pick up the randomizer.
     bool randomize = config(L"randomize", false);
+    // By default do not use omp threads for deserialization.
+    bool multiThreadedDeserialization = config(L"multiThreadedDeserialization", false);
     if (randomize)
     {
         // By default randomizing the whole data set.
         size_t randomizationWindow = config(L"randomizationWindow", requestDataSize);
-        bool useLegacyRandomization = config(L"useLegacy", true);
-        bool multithreadedGetNextSequences = false;
-        BlockRandomizer::DecimationMode decimationMode = BlockRandomizer::DecimationMode::chunk;
-        m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, decimationMode, useLegacyRandomization, multithreadedGetNextSequences);
+        // By default using STL random number generator.
+        bool useLegacyRandomization = config(L"useLegacyRandomization", false);
+        m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, BlockRandomizer::DecimationMode::chunk, useLegacyRandomization, multiThreadedDeserialization);
     }
     else
     {
-        m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer);
+        m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer, multiThreadedDeserialization);
     }
 
+    // In case when there are transforms, applying them to the data.
     m_sequenceEnumerator = m_transforms.empty()
         ? m_sequenceEnumerator 
         : std::make_shared<TransformController>(m_transforms, m_sequenceEnumerator);
@@ -126,13 +128,14 @@ Minibatch CompositeDataReader::ReadMinibatch()
     return m_packer->ReadMinibatch();
 }
 
+// Create deserializers based on the specified configuration.
 void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
 {
     argvector<ConfigValue> deserializerConfigs =
         readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {})));
 
     assert(m_deserializers.empty());
-    bool primary = true;  // CUrrently, the first deserializer becomes primary - it drives chunking.
+    bool primary = true;  // Currently, the first deserializer becomes primary - it drives chunking.
     for (size_t i = 0; i < deserializerConfigs.size(); ++i)
     {
         // TODO: Should go away in the future. Framing can be done on top of deserializers.
@@ -146,6 +149,7 @@ void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConf
     }
 }
 
+// Creates a particular deserializer based on the config.
 IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParameters& deserializerConfig, bool primary)
 {
     typedef bool(*CreateDeserializerFactory) (IDataDeserializer** d, const std::wstring& type, const ConfigParameters& cfg, CorpusDescriptorPtr corpus, bool primary);
@@ -160,13 +164,14 @@ IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParamet
         RuntimeError("Cannot create deserializer. Please check module and type in the configuration.");
     }
 
+    // Create transformers if necessary.
     CreateTransforms(deserializerConfig);
 
-
     assert(d != nullptr);
     return IDataDeserializerPtr(d);
 }
 
+// Create transformers based on the configuration.
 void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
 {
     std::string defaultModule = deserializerConfig("module");
@@ -179,6 +184,7 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
             LogicError("Only a single 'transforms' config is allowed per stream.");
         }
 
+        // No need to create anything for this stream, skipping.
         if (inputSections.empty())
         {
             continue;
@@ -187,6 +193,7 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
         ConfigParameters input = inputs[i](inputSections.front());
         std::wstring inputName = msra::strfun::utf16(input.ConfigName());
 
+        // Read tranformers in order.
         argvector<ConfigParameters> transforms = input("transforms");
         for (size_t j = 0; j < transforms.size(); ++j)
         {
@@ -197,6 +204,7 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
 
 }
 
+// Create a transformer for a particular configuration. Loading it from the default module if module is not specified.
 TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
 {
     typedef bool(*TransformerFactory) (Transformer** t, const std::wstring& type, const ConfigParameters& cfg);
diff --git a/Source/Readers/ImageReader/Exports.cpp b/Source/Readers/ImageReader/Exports.cpp
index 43cf72a65..46fbccc18 100644
--- a/Source/Readers/ImageReader/Exports.cpp
+++ b/Source/Readers/ImageReader/Exports.cpp
@@ -35,55 +35,39 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader)
 }
 
 // TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
+// A factory method for creating image deserializers.
 extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
 {
     if (type == L"ImageDataDeserializer")
-    {
         *deserializer = new ImageDataDeserializer(corpus, deserializerConfig);
-    }
     else
-    {
         // Unknown type.
         return false;
-    }
 
     // Deserializer created.
     return true;
 }
 
+// A factory method for creating image transformers.
 extern "C" DATAREADER_API bool CreateTransformer(Transformer** transformer, const std::wstring& type, const ConfigParameters& config)
 {
     if (type == L"Crop")
-    {
         *transformer = new CropTransformer(config);
-    }
     else if (type == L"Scale")
-    {
         *transformer = new ScaleTransformer(config);
-    }
     else if (type == L"Color")
-    {
         *transformer = new ColorTransformer(config);
-    }
     else if (type == L"Intensity")
-    {
         *transformer = new IntensityTransformer(config);
-    }
     else if (type == L"Mean")
-    {
         *transformer = new MeanTransformer(config);
-    }
     else if (type == L"Transpose")
-    {
         *transformer = new TransposeTransformer(config);
-    }
     else
-    {
         // Unknown type.
         return false;
-    }
 
-    // Deserializer created.
+    // Transformer created.
     return true;
 }
 
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.h b/Source/Readers/ImageReader/ImageDataDeserializer.h
index 265266846..3078a80ce 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.h
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.h
@@ -8,7 +8,6 @@
 #include "DataDeserializerBase.h"
 #include "Config.h"
 #include "ByteReader.h"
-#include "ImageConfigHelper.h"
 #include <unordered_map>
 #include "CorpusDescriptor.h"
 
@@ -22,9 +21,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 class ImageDataDeserializer : public DataDeserializerBase
 {
 public:
+    // Constructor for compositional configuration.
     ImageDataDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config);
 
-    // TODO: This constructor should be deprecated. Compositional config should be used instead.
+    // TODO: This constructor should be deprecated in the future. Compositional config should be used instead.
     explicit ImageDataDeserializer(const ConfigParameters& config);
 
     // Gets sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index 6d04827f9..c99c0af08 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -7,13 +7,13 @@
 #include "ImageReader.h"
 #include "Config.h"
 #include "ImageConfigHelper.h"
+#include "ImageTransformers.h"
 #include "BlockRandomizer.h"
 #include "NoRandomizer.h"
 #include "ImageDataDeserializer.h"
 #include "FramePacker.h"
-#include "TransformController.h"
 #include <omp.h>
-#include "ImageTransformers.h"
+#include "TransformController.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index e7ccc594a..55065df17 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -80,8 +80,7 @@ private:
 
     CropType ParseCropType(const std::string &src);
     RatioJitterType ParseJitterType(const std::string &src);
-    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio,
-                         std::mt19937 &rng);
+    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);
 
     conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
     CropType m_cropType;
@@ -127,7 +126,7 @@ private:
     cv::Mat m_meanImg;
 };
 
-// Transpose transformation from HWC to CHW.
+// Transpose transformation from HWC to CHW (note: row-major notation).
 class TransposeTransformer : public Transformer
 {
 public:
@@ -200,5 +199,4 @@ private:
     conc_stack<std::unique_ptr<cv::Mat>> m_hsvTemp;
 };
 
-
 }}}
diff --git a/Source/Readers/ReaderLib/TransformController.h b/Source/Readers/ReaderLib/TransformController.h
index 0eb9987d6..dc603e473 100644
--- a/Source/Readers/ReaderLib/TransformController.h
+++ b/Source/Readers/ReaderLib/TransformController.h
@@ -12,31 +12,34 @@
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
+// A pair of a transformer and the stream name to which the transformer should be a applied.
 struct Transformation
 {
     TransformerPtr m_transfromer;
     std::wstring m_streamName;
 };
 
+// A class responsible for applying a list of transformers to sequences and stream descriptions.
 class TransformController : public SequenceEnumerator
 {
 public:
-    TransformController(const std::vector<Transformation>& transformations, SequenceEnumeratorPtr randomizer)
-        : m_randomizer(randomizer)
+    TransformController(const std::vector<Transformation>& transformations, SequenceEnumeratorPtr sequenceProvider)
+        : m_sequenceProvider(sequenceProvider)
     {
-        m_chainOfStreamDescriptions.reserve(m_transformations.size() + 1);
-        std::vector<StreamDescriptionPtr> streams = m_randomizer->GetStreamDescriptions();
-        m_chainOfStreamDescriptions.push_back(streams);
+        // Applying transformations to stream descriptions,
+        // i.e. a transofrmation can change a stream from dense to sparse.
+        std::vector<StreamDescriptionPtr> transformedStreams = m_sequenceProvider->GetStreamDescriptions();
         for (auto& t : transformations)
         {
-            size_t streamId = GetStreamId(t.m_streamName, streams);
+            size_t streamId = GetStreamId(t.m_streamName, transformedStreams);
             m_transformations.push_back(std::make_pair(t, streamId));
-            streams[streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*streams[streamId]));
-            m_chainOfStreamDescriptions.push_back(streams);
+            transformedStreams[streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*transformedStreams[streamId]));
         }
+        m_outputStreams = transformedStreams;
     }
 
     // Sets configuration for the current epoch.
+    // Some transformers can change their config based on the epoch.
     virtual void StartEpoch(const EpochConfiguration &config) override
     {
         assert(m_next != nullptr);
@@ -45,21 +48,21 @@ public:
             t.first.m_transfromer->StartEpoch(config);
         }
 
-        m_randomizer->StartEpoch(config);
+        m_sequenceProvider->StartEpoch(config);
     }
 
     // Description of streams that the transformer provides.
     virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
     {
-        return m_chainOfStreamDescriptions.back();
+        return m_outputStreams;
     }
 
-    // Gets next sequences up to a maximum count of samples.
-    // Sequences contains data for all streams.
+    // Gets next sequences up to a maximum count of samples,
+    // applying transformers to particular streams.
     virtual Sequences GetNextSequences(size_t sampleCount) override
     {
         assert(m_next != nullptr);
-        Sequences sequences = m_randomizer->GetNextSequences(sampleCount);
+        Sequences sequences = m_sequenceProvider->GetNextSequences(sampleCount);
         if (sequences.m_data.empty())
         {
             return sequences;
@@ -92,9 +95,9 @@ private:
         LogicError("Unexpected stream specifed for transformation.");
     }
 
-    SequenceEnumeratorPtr m_randomizer;
+    SequenceEnumeratorPtr m_sequenceProvider;
+    std::vector<StreamDescriptionPtr> m_outputStreams;
     std::vector<std::pair<Transformation, size_t>> m_transformations;
-    std::vector<std::vector<StreamDescriptionPtr>> m_chainOfStreamDescriptions;
 };
 
 }}}

From 831f7f2d04928f93fd11c3ad115bac855267ad60 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 11 May 2016 11:47:38 +0200
Subject: [PATCH 39/51] Addressing CR comments

---
 .../CNTKTextFormatReader.cpp                  |  3 ++
 .../CompositeDataReader.cpp                   | 49 ++++++++++++-------
 .../CompositeDataReader/CompositeDataReader.h |  7 ++-
 .../CompositeDataReader.vcxproj.filters       |  2 +-
 .../Readers/ImageReader/ImageConfigHelper.cpp |  4 +-
 .../ImageReader/ImageDataDeserializer.cpp     |  9 +++-
 .../ImageReader/ImageDataDeserializer.h       |  3 +-
 Source/Readers/ImageReader/ImageReader.cpp    |  5 +-
 .../Readers/ImageReader/ImageTransformers.cpp | 13 ++---
 .../Readers/ImageReader/ImageTransformers.h   |  2 +-
 Source/Readers/ReaderLib/ConfigUtil.h         |  4 +-
 Source/Readers/ReaderLib/SequenceEnumerator.h | 23 ++++-----
 .../Readers/ReaderLib/TransformController.h   | 14 +++---
 Source/Readers/ReaderLib/Transformer.h        |  8 +--
 .../EndToEndTests/Image/AlexNet/AlexNet.cntk  | 28 +++++------
 15 files changed, 99 insertions(+), 75 deletions(-)

diff --git a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
index 2486ba56d..e76fd33e0 100644
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
@@ -14,6 +14,9 @@
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
+// TODO: This class should go away eventually.
+// TODO: The composition of packer + randomizer + different deserializers in a generic manner is done in the CompositeDataReader.
+// TODO: Currently preserving this for backward compatibility with current configs.
 CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
     const ConfigParameters& config) :
     m_provider(provider)
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
index 60620fac3..771913a96 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@@ -2,7 +2,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 //
-// CompositeDataReader.cpp : Defines a reader that allows composing different deserializers.
+// CompositeReader.cpp : Defines a reader that allows composing different deserializers.
 // With this reader in place the users should only extend deserializers.
 //
 
@@ -11,8 +11,6 @@
 #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms  --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
 #endif
 
-#define DATAREADER_EXPORTS // creating the exports here
-
 #include "CompositeDataReader.h"
 #include "Bundler.h"
 #include "BlockRandomizer.h"
@@ -22,20 +20,17 @@
 #include "TruncatedBpttPacker.h"
 #include "CorpusDescriptor.h"
 #include "ConfigUtil.h"
-#include <omp.h>
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
+// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
+// directly to the new Reader API. 
+// For more information please see its header file.
+// This method composes together packers + randomizer + a set of transformers and deserializers.
 CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryProviderPtr provider) : m_layout(make_shared<MBLayout>()),
     m_corpus(std::make_shared<CorpusDescriptor>()),
     m_provider(provider)
 {
-    int threadCount = config(L"numCPUThreads", 0);
-    if (threadCount > 0)
-    {
-        omp_set_num_threads(threadCount);
-    }
-
     // Identifying packing mode.
     bool frameMode = config(L"frameMode", true);
     bool truncated = config(L"truncated", false);
@@ -70,7 +65,7 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
 
     if (m_deserializers.empty())
     {
-        InvalidArgument("Could not fine deserializers in the reader config.");
+        InvalidArgument("Could not find deserializers in the reader config.");
     }
 
     IDataDeserializerPtr deserializer = m_deserializers.front();
@@ -86,7 +81,9 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
 
     // Pick up the randomizer.
     bool randomize = config(L"randomize", false);
-    // By default do not use omp threads for deserialization.
+    // By default do not use omp threads for deserialization of sequences.
+    // It makes sense to put it to true for cases when deserialization is CPU intensive,
+    // i.e. decompression of images.
     bool multiThreadedDeserialization = config(L"multiThreadedDeserialization", false);
     if (randomize)
     {
@@ -128,7 +125,10 @@ Minibatch CompositeDataReader::ReadMinibatch()
     return m_packer->ReadMinibatch();
 }
 
-// Create deserializers based on the specified configuration.
+// Create deserializers based on the specified configuration. 
+// deserializers = [
+//        [ type = "ImageDataDeserializer" module = "ImageReader" ...]
+//        [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...]
 void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
 {
     argvector<ConfigValue> deserializerConfigs =
@@ -149,7 +149,8 @@ void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConf
     }
 }
 
-// Creates a particular deserializer based on the config.
+// Creates a particular deserializer based on the config: its loads the external module and calls CreateDeserializer
+// factory function for a particular deserializer type.
 IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParameters& deserializerConfig, bool primary)
 {
     typedef bool(*CreateDeserializerFactory) (IDataDeserializer** d, const std::wstring& type, const ConfigParameters& cfg, CorpusDescriptorPtr corpus, bool primary);
@@ -171,13 +172,23 @@ IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParamet
     return IDataDeserializerPtr(d);
 }
 
-// Create transformers based on the configuration.
+// Create transformers based on the configuration, i.e.
+// deserializers = [
+//     [
+//         type = "ImageDataDeserializer"
+//         module = "ImageReader"
+//         inputs = [
+//               features = [
+//---->              transforms = [
+//                       [type = "Crop"]:[type = "Scale"]...
+
 void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
 {
     std::string defaultModule = deserializerConfig("module");
     argvector<ConfigParameters> inputs = deserializerConfig("inputs");
     for (size_t i = 0; i < inputs.size(); ++i)
     {
+        // Trying to find transfomers in a stream section of the config.
         auto inputSections = TryGetSectionsWithParameter(inputs[i], "transforms");
         if (inputSections.size() > 1)
         {
@@ -193,7 +204,7 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
         ConfigParameters input = inputs[i](inputSections.front());
         std::wstring inputName = msra::strfun::utf16(input.ConfigName());
 
-        // Read tranformers in order.
+        // Read tranformers in order and appending them to the transformer pipeline.
         argvector<ConfigParameters> transforms = input("transforms");
         for (size_t j = 0; j < transforms.size(); ++j)
         {
@@ -204,7 +215,9 @@ void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerC
 
 }
 
-// Create a transformer for a particular configuration. Loading it from the default module if module is not specified.
+// Create a transformer for a particular configuration. Loading it from the module of the deserializer if module is not specified, i.e.
+//     transforms = [
+//         [type = "Scale" width=...]:...
 TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
 {
     typedef bool(*TransformerFactory) (Transformer** t, const std::wstring& type, const ConfigParameters& cfg);
@@ -216,7 +229,7 @@ TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& co
     Transformer* t;
     if (!f(&t, transformerType, config))
     {
-        RuntimeError("Cannot create transformer. Please check module and type in the configuration.");
+        RuntimeError("Cannot create transformer. Please check the module and type in the configuration.");
     }
 
     assert(t != nullptr);
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.h b/Source/Readers/CompositeDataReader/CompositeDataReader.h
index 3398de8e0..96a588797 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@@ -36,22 +36,21 @@ typedef std::shared_ptr<StreamDescription> StreamDescriptionPtr;
 struct EpochConfiguration;
 struct Minibatch;
 
-// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
+// The whole CompositeReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
 // directly to the new Reader API. The example of the cntk configuration that this reader supports can be found at
 //     Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/FullUtterance/cntk.cntk
-// CompositeDataReader is a factory for the new readers. Its main responsibility is to read the configuration and create the
+// CompositeReader is a factory for the new readers. Its main responsibility is to read the configuration and create the
 // corresponding set of deserializers, the corpus descriptor, transformers, randomizer and packer, providing the following functionality:
 //     - all input sequences are defined by the corpus descriptor
 //     - deserializers provide sequences according to the corpus descriptor
 //     - sequences can be transformed by the transformers applied on top of deserializer (TODO: not yet in place)
 //     - deserializers are bound together using the bundler - it bundles sequences with the same sequence id retrieved from different deserializers
 //     - packer is used to pack randomized sequences into the minibatch
-// The composite data reader is currently also responsible for asynchronous prefetching of the minibatch data.
+// The composite reader is currently also responsible for asynchronous prefetching of the minibatch data.
 
 // In order not to break existing configs and allow deserializers composition it exposes the same interface as the old readers, but it is not exposed
 // to external developers. The actual "reader developer" now has to provide deserializer(s) only.
 // TODO: Implement proper corpus descriptor.
-// TODO: Same code as in ReaderLib shim, the one in the ReaderLib will be deleted as the next step.
 // TODO: Change this interface when SGD is changed.
 class CompositeDataReader : public Reader, protected Plugin
 {
diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters
index 5876724e8..9781986ef 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters
@@ -3,8 +3,8 @@
   <ItemGroup>
     <ClCompile Include="dllmain.cpp" />
     <ClCompile Include="stdafx.cpp" />
-    <ClCompile Include="CompositeDataReader.cpp" />
     <ClCompile Include="Exports.cpp" />
+    <ClCompile Include="CompositeDataReader.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="stdafx.h" />
diff --git a/Source/Readers/ImageReader/ImageConfigHelper.cpp b/Source/Readers/ImageReader/ImageConfigHelper.cpp
index b5af5b489..44d4b5506 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp
@@ -13,8 +13,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
     : m_dataFormat(CHW)
 {
-    std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
-    std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");
+    std::vector<std::string> featureNames = GetSectionsWithParameter("ImageReader", config, "width");
+    std::vector<std::string> labelNames = GetSectionsWithParameter("ImageReader", config, "labelDim");
 
     // REVIEW alexeyk: currently support only one feature and label section.
     if (featureNames.size() != 1 || labelNames.size() != 1)
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.cpp b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
index 72047bf7b..15ebd38f0 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
@@ -117,12 +117,15 @@ public:
     }
 };
 
+// A new constructor to support new compositional configuration,
+// that allows composition of deserializers and transforms on inputs.
+// For a sample config please see AlexImage end-to-end test.
 // TODO: Provide only sequences specified in the corpus descriptor.
 ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr, const ConfigParameters& config)
 {
     ConfigParameters inputs = config("inputs");
-    std::vector<std::string> featureNames = GetSectionsWithParameter(inputs, "transforms");
-    std::vector<std::string> labelNames = GetSectionsWithParameter(inputs, "labelDim");
+    std::vector<std::string> featureNames = GetSectionsWithParameter("ImageDataDeserializer", inputs, "transforms");
+    std::vector<std::string> labelNames = GetSectionsWithParameter("ImageDataDeserializer", inputs, "labelDim");
 
     // TODO: currently support only one feature and label section.
     if (featureNames.size() != 1 || labelNames.size() != 1)
@@ -167,6 +170,8 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr, const ConfigPa
     CreateSequenceDescriptions(config(L"file"), labelDimension, multiViewCrop);
 }
 
+// TODO: Should be removed at some point.
+// Supports old type of ImageReader configuration.
 ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
 {
     ImageConfigHelper configHelper(config);
diff --git a/Source/Readers/ImageReader/ImageDataDeserializer.h b/Source/Readers/ImageReader/ImageDataDeserializer.h
index 3078a80ce..1a9e97e32 100644
--- a/Source/Readers/ImageReader/ImageDataDeserializer.h
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.h
@@ -21,7 +21,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 class ImageDataDeserializer : public DataDeserializerBase
 {
 public:
-    // Constructor for compositional configuration.
+    // A new constructor to support new compositional configuration,
+    // that allows composition of deserializers and transforms on inputs.
     ImageDataDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config);
 
     // TODO: This constructor should be deprecated in the future. Compositional config should be used instead.
diff --git a/Source/Readers/ImageReader/ImageReader.cpp b/Source/Readers/ImageReader/ImageReader.cpp
index c99c0af08..7968f8fe9 100644
--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@@ -17,6 +17,9 @@
 
 namespace Microsoft { namespace MSR { namespace CNTK {
 
+// TODO: This class should go away eventually.
+// TODO: The composition of packer + randomizer + different deserializers in a generic manner is done in the CompositeDataReader.
+// TODO: Currently preserving this for backward compatibility with current configs.
 ImageReader::ImageReader(MemoryProviderPtr provider,
                          const ConfigParameters& config)
     : m_seed(0), m_provider(provider)
@@ -52,10 +55,10 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
         randomizer = std::make_shared<NoRandomizer>(deserializer, multithreadedGetNextSequences);
     }
 
+    // Create transformations for a single feature stream.
     std::wstring featureName = m_streams[configHelper.GetFeatureStreamId()]->m_name;
     ConfigParameters featureStream = config(featureName);
 
-    // Create transformations.
     std::vector<Transformation> transformations;
     transformations.push_back(Transformation{ std::make_shared<CropTransformer>(featureStream), featureName });
     transformations.push_back(Transformation{ std::make_shared<ScaleTransformer>(featureStream), featureName });
diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index 82ff38c2f..56f9e0519 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -29,6 +29,8 @@ ImageTransformerBase::ImageTransformerBase(const ConfigParameters& readerConfig)
     m_seed = readerConfig(L"seed", 0u);
 }
 
+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Currently for image transformations we only support dense streams of type double or float.
 StreamDescription ImageTransformerBase::Transform(const StreamDescription& inputStream)
 {
     m_inputStream = inputStream;
@@ -55,6 +57,7 @@ StreamDescription ImageTransformerBase::Transform(const StreamDescription& input
     return m_outputStream;
 }
 
+// Transforms a single sequence as open cv dense image. Called once per sequence.
 SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
 {
     auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
@@ -87,7 +90,6 @@ SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
 CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
     floatargvector cropRatio = config(L"cropRatio", "1.0");
@@ -332,6 +334,8 @@ ScaleTransformer::ScaleTransformer(const ConfigParameters& config) : ImageTransf
         m_interp.push_back(cv::INTER_LINEAR);
 }
 
+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Scale transformer transforms the stream so that all samples are of the same size.
 StreamDescription ScaleTransformer::Transform(const StreamDescription& inputStream)
 {
     ImageTransformerBase::Transform(inputStream);
@@ -339,7 +343,6 @@ StreamDescription ScaleTransformer::Transform(const StreamDescription& inputStre
     return m_outputStream;
 }
 
-
 void ScaleTransformer::Apply(size_t id, cv::Mat &mat)
 {
     UNUSED(id);
@@ -405,10 +408,8 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
     }
 }
 
-TransposeTransformer::TransposeTransformer(const ConfigParameters&)
-{
-}
-
+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Transpose transformer expects the dense input stream with samples as HWC and outputs CHW.
 StreamDescription TransposeTransformer::Transform(const StreamDescription& inputStream)
 {
     m_inputStream = inputStream;
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index 55065df17..9179c8ea3 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -130,7 +130,7 @@ private:
 class TransposeTransformer : public Transformer
 {
 public:
-    explicit TransposeTransformer(const ConfigParameters& config);
+    explicit TransposeTransformer(const ConfigParameters&) {}
 
     void StartEpoch(const EpochConfiguration&) override {}
 
diff --git a/Source/Readers/ReaderLib/ConfigUtil.h b/Source/Readers/ReaderLib/ConfigUtil.h
index 7f2ed609a..297adcec7 100644
--- a/Source/Readers/ReaderLib/ConfigUtil.h
+++ b/Source/Readers/ReaderLib/ConfigUtil.h
@@ -27,12 +27,12 @@ inline std::vector<std::string> TryGetSectionsWithParameter(const ConfigParamete
 }
 
 // Helper function to get sections that contains specified parameter. Throws if the parameter does not exist.
-inline std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
+inline std::vector<std::string> GetSectionsWithParameter(const std::string& reader, const ConfigParameters& config, const std::string& parameterName)
 {
     auto result = TryGetSectionsWithParameter(config, parameterName);
     if (result.empty())
     {
-        RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
+        RuntimeError("%s requires %s parameter.", reader.c_str(), parameterName.c_str());
     }
     return result;
 }
diff --git a/Source/Readers/ReaderLib/SequenceEnumerator.h b/Source/Readers/ReaderLib/SequenceEnumerator.h
index 74fc88a0d..cad842d8e 100644
--- a/Source/Readers/ReaderLib/SequenceEnumerator.h
+++ b/Source/Readers/ReaderLib/SequenceEnumerator.h
@@ -12,39 +12,36 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 
 class ConfigParameters;
 
-// Defines a set of sequences.
+// Defines a set of sequences for a set of streams.
+// Return by the sequence enumerator.
 struct Sequences
 {
-    Sequences()
-        : m_endOfEpoch(false)
-    {
-    }
-
     // Data for up to a requested number of sequences.
-    // Indices in the inner vector have to correspond to the stream IDs
-    // given by GetStream().
+    // Indices in the outer vector have to correspond to the stream ids returned from the GetStreamDescriptions().
     std::vector<std::vector<SequenceDataPtr>> m_data;
 
     // Indicates whether the epoch ends with the data returned.
-    bool m_endOfEpoch;
+    bool m_endOfEpoch = false;
 };
 
 class SequenceEnumerator;
 typedef std::shared_ptr<SequenceEnumerator> SequenceEnumeratorPtr;
 
-// Sequence enumerator is used by the packer to get a set of new sequences.
-// This interface is internal to CNTK and not exposed to the developers of deserializers/plugins.
+// Sequence enumerator is internal interface used by the packer to get a set of new sequences.
+// It is implemented either by different randomizers or by TransformController that can wrap the randomizer
+// and apply different transforms on top of data.
+
+// This interface is not exposed to the developers of deserializers/plugins, internal to CNTK.
 class SequenceEnumerator
 {
 public:
-    // Describes streams the transformer produces.
+    // Describes streams the sequence enumerator produces.
     virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
 
     // Sets current epoch configuration.
     virtual void StartEpoch(const EpochConfiguration& config) = 0;
 
     // Gets next sequences up to a maximum count of samples.
-    // The return value can be used until the next call to GetNextSequences.
     virtual Sequences GetNextSequences(size_t sampleCount) = 0;
 
     virtual ~SequenceEnumerator()
diff --git a/Source/Readers/ReaderLib/TransformController.h b/Source/Readers/ReaderLib/TransformController.h
index dc603e473..c6e2f2eca 100644
--- a/Source/Readers/ReaderLib/TransformController.h
+++ b/Source/Readers/ReaderLib/TransformController.h
@@ -15,11 +15,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // A pair of a transformer and the stream name to which the transformer should be a applied.
 struct Transformation
 {
-    TransformerPtr m_transfromer;
+    TransformerPtr m_transformer;
     std::wstring m_streamName;
 };
 
 // A class responsible for applying a list of transformers to sequences and stream descriptions.
+// Delegates retrieving of sequences to another sequence provider(such as randomizer) and applies transformations after retrieving.
+// Usually used by the packer to get next set of sequences.
 class TransformController : public SequenceEnumerator
 {
 public:
@@ -27,13 +29,13 @@ public:
         : m_sequenceProvider(sequenceProvider)
     {
         // Applying transformations to stream descriptions,
-        // i.e. a transofrmation can change a stream from dense to sparse.
+        // i.e. a transformation can change a stream from dense to sparse.
         std::vector<StreamDescriptionPtr> transformedStreams = m_sequenceProvider->GetStreamDescriptions();
         for (auto& t : transformations)
         {
             size_t streamId = GetStreamId(t.m_streamName, transformedStreams);
             m_transformations.push_back(std::make_pair(t, streamId));
-            transformedStreams[streamId] = std::make_shared<StreamDescription>(t.m_transfromer->Transform(*transformedStreams[streamId]));
+            transformedStreams[streamId] = std::make_shared<StreamDescription>(t.m_transformer->Transform(*transformedStreams[streamId]));
         }
         m_outputStreams = transformedStreams;
     }
@@ -45,7 +47,7 @@ public:
         assert(m_next != nullptr);
         for (auto& t : m_transformations)
         {
-            t.first.m_transfromer->StartEpoch(config);
+            t.first.m_transformer->StartEpoch(config);
         }
 
         m_sequenceProvider->StartEpoch(config);
@@ -73,7 +75,7 @@ public:
         {
             for (auto& t : m_transformations)
             {
-                sequences.m_data[t.second][j] = t.first.m_transfromer->Transform(sequences.m_data[t.second][j]);
+                sequences.m_data[t.second][j] = t.first.m_transformer->Transform(sequences.m_data[t.second][j]);
             }
         }
 
@@ -92,7 +94,7 @@ private:
         }
 
         assert(false);
-        LogicError("Unexpected stream specifed for transformation.");
+        LogicError("Unexpected stream specified for transformation.");
     }
 
     SequenceEnumeratorPtr m_sequenceProvider;
diff --git a/Source/Readers/ReaderLib/Transformer.h b/Source/Readers/ReaderLib/Transformer.h
index a48783a8b..f67013595 100644
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@@ -5,7 +5,6 @@
 
 #pragma once
 
-#include <vector>
 #include "DataDeserializer.h"
 
 namespace Microsoft { namespace MSR { namespace CNTK {
@@ -23,11 +22,12 @@ public:
     // based on the epoch.
     virtual void StartEpoch(const EpochConfiguration &config) = 0;
 
-    // Transforms input stream into output stream.
+    // Transformers are applied on a particular input stream - this method should describe
+    // how inputStream is transformed to the output stream (return value)
     virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;
 
-    // Transforms input sequences into output sequence.
-    virtual SequenceDataPtr Transform(SequenceDataPtr sequence) = 0;
+    // This method should describe how input sequences is transformed to the output sequence.
+    virtual SequenceDataPtr Transform(SequenceDataPtr inputSequence) = 0;
 
     virtual ~Transformer()
     {
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index 6436bed34..153a05e05 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -43,19 +43,19 @@ Train=[
     ]
 
     reader = [
-        verbosity = 0
-        randomize = true
+        verbosity=0
+        randomize=true
 
         # Currently for image reader a single sequence is a chunk
         # so setting randomization window to 1.
-        randomizationWindow =1
-        useLegacy = false
+        randomizationWindow=1
+        useLegacy=false
 
         # A list of deserializers to use.
-        deserializers = [
+        deserializers=[
             [   
-                type = "ImageDataDeserializer"
-                module = "ImageReader"
+                type="ImageDataDeserializer"
+                module="ImageReader"
 
                 # Map file which maps images to labels using the following format:
                 # <full path to image><tab><numerical label (0-based class id)>
@@ -64,7 +64,7 @@ Train=[
                 file=$ConfigDir$/train_map.txt
 
                 # Description of input streams
-                inputs = [
+                inputs=[
                     features=[
                         transforms=[
                             [
@@ -120,18 +120,18 @@ Test=[
     ]
     
     reader=[
-        verbosity = 0
-        randomize = false
+        verbosity=0
+        randomize=false
 
         # A list of deserializers to use.
-        deserializers = [
+        deserializers=[
             [
-                type = "ImageDataDeserializer"
-                module = "ImageReader"
+                type="ImageDataDeserializer"
+                module="ImageReader"
                 file=$ConfigDir$/val_map.txt
 
                 # Description of input streams
-                inputs = [
+                inputs=[
                     features=[
                         transforms=[
                             [

From c5f6709c8f5ee1504c02e5ad183bb6793e9e7f4c Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 11 May 2016 11:52:08 +0200
Subject: [PATCH 40/51] Correcting some comments

---
 Source/Readers/CompositeDataReader/CompositeDataReader.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Readers/CompositeDataReader/CompositeDataReader.h b/Source/Readers/CompositeDataReader/CompositeDataReader.h
index 96a588797..811a066e5 100644
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@@ -36,10 +36,10 @@ typedef std::shared_ptr<StreamDescription> StreamDescriptionPtr;
 struct EpochConfiguration;
 struct Minibatch;
 
-// The whole CompositeReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
+// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
 // directly to the new Reader API. The example of the cntk configuration that this reader supports can be found at
 //     Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/FullUtterance/cntk.cntk
-// CompositeReader is a factory for the new readers. Its main responsibility is to read the configuration and create the
+// CompositeDataReader is a factory for the new readers. Its main responsibility is to read the configuration and create the
 // corresponding set of deserializers, the corpus descriptor, transformers, randomizer and packer, providing the following functionality:
 //     - all input sequences are defined by the corpus descriptor
 //     - deserializers provide sequences according to the corpus descriptor

From f9ac57edac63e5818deb47f664424cd57453d43a Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Wed, 11 May 2016 14:47:47 +0200
Subject: [PATCH 41/51] Fixing debug build

---
 Source/Readers/ReaderLib/TransformController.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Readers/ReaderLib/TransformController.h b/Source/Readers/ReaderLib/TransformController.h
index c6e2f2eca..5168b316a 100644
--- a/Source/Readers/ReaderLib/TransformController.h
+++ b/Source/Readers/ReaderLib/TransformController.h
@@ -44,7 +44,7 @@ public:
     // Some transformers can change their config based on the epoch.
     virtual void StartEpoch(const EpochConfiguration &config) override
     {
-        assert(m_next != nullptr);
+        assert(m_sequenceProvider != nullptr);
         for (auto& t : m_transformations)
         {
             t.first.m_transformer->StartEpoch(config);
@@ -63,7 +63,7 @@ public:
     // applying transformers to particular streams.
     virtual Sequences GetNextSequences(size_t sampleCount) override
     {
-        assert(m_next != nullptr);
+        assert(m_sequenceProvider != nullptr);
         Sequences sequences = m_sequenceProvider->GetNextSequences(sampleCount);
         if (sequences.m_data.empty())
         {

From ff3837cce257b9a8b53956e784fc0bcbfd489bc7 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Thu, 12 May 2016 17:20:21 +0200
Subject: [PATCH 42/51] Fixing rebase issues

---
 .../Readers/ImageReader/ImageTransformers.cpp | 21 -------------------
 .../Readers/ImageReader/ImageTransformers.h   |  1 -
 2 files changed, 22 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index 56f9e0519..921d1cd4c 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -164,27 +164,6 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
     m_rngs.push(std::move(rng));
 }
 
-CropTransformer::CropType
-CropTransformer::ParseCropType(const std::string &src)
-{
-    if (src.empty() || AreEqualIgnoreCase(src, "center"))
-    {
-        return CropType::Center;
-    }
-
-    if (AreEqualIgnoreCase(src, "random"))
-    {
-        return CropType::Random;
-    }
-
-    if (AreEqualIgnoreCase(src, "multiview10"))
-    {
-        return CropType::MultiView10;
-    }
-
-    RuntimeError("Invalid crop type: %s.", src.c_str());
-}
-
 CropTransformer::RatioJitterType
 CropTransformer::ParseJitterType(const std::string &src)
 {
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index 9179c8ea3..9033264be 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -78,7 +78,6 @@ private:
 
     void StartEpoch(const EpochConfiguration &config) override;
 
-    CropType ParseCropType(const std::string &src);
     RatioJitterType ParseJitterType(const std::string &src);
     cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);
 

From 27b7c7230d15ebf46dfd0f44809f4208da9013b0 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Thu, 12 May 2016 17:45:00 +0200
Subject: [PATCH 43/51] Fixing crop transformation after rebase

---
 Source/Readers/ImageReader/ImageConfigHelper.h   | 4 ++--
 Source/Readers/ImageReader/ImageTransformers.cpp | 9 +++++----
 Source/Readers/ImageReader/ImageTransformers.h   | 2 --
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/Source/Readers/ImageReader/ImageConfigHelper.h b/Source/Readers/ImageReader/ImageConfigHelper.h
index 689ce8349..db37fc07d 100644
--- a/Source/Readers/ImageReader/ImageConfigHelper.h
+++ b/Source/Readers/ImageReader/ImageConfigHelper.h
@@ -68,12 +68,12 @@ public:
         return m_cropType == CropType::MultiView10;
     }
 
+    static CropType ParseCropType(const std::string &src);
+
 private:
     ImageConfigHelper(const ImageConfigHelper&) = delete;
     ImageConfigHelper& operator=(const ImageConfigHelper&) = delete;
 
-    CropType ParseCropType(const std::string &src);
-
     std::string m_mapPath;
     std::vector<StreamDescriptionPtr> m_streams;
     ImageLayoutKind m_dataFormat;
diff --git a/Source/Readers/ImageReader/ImageTransformers.cpp b/Source/Readers/ImageReader/ImageTransformers.cpp
index 921d1cd4c..175681d70 100644
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@@ -25,7 +25,6 @@ struct ImageSequenceData : DenseSequenceData
 
 ImageTransformerBase::ImageTransformerBase(const ConfigParameters& readerConfig) : m_imageElementType(0)
 {
-    m_imageConfig = std::make_unique<ImageConfigHelper>(readerConfig);
     m_seed = readerConfig(L"seed", 0u);
 }
 
@@ -106,9 +105,11 @@ CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransfor
 
     m_jitterType = ParseJitterType(config(L"jitterType", ""));
 
+    m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", ""));
+
     if (!config.ExistsCurrent(L"hflip"))
     {
-        m_hFlip = m_imageConfig->GetCropType() == CropType::Random;
+        m_hFlip = m_cropType == CropType::Random;
     }
     else
     {
@@ -152,9 +153,9 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
         RuntimeError("Jitter type currently not implemented.");
     }
 
-    int viewIndex = m_imageConfig->IsMultiViewCrop() ? (int)(id % 10) : 0;
+    int viewIndex = m_cropType == CropType::MultiView10 ? (int)(id % 10) : 0;
 
-    mat = mat(GetCropRect(m_imageConfig->GetCropType(), viewIndex, mat.rows, mat.cols, ratio, *rng));
+    mat = mat(GetCropRect(m_cropType, viewIndex, mat.rows, mat.cols, ratio, *rng));
     if ((m_hFlip && std::bernoulli_distribution()(*rng)) ||
         viewIndex >= 5)
     {
diff --git a/Source/Readers/ImageReader/ImageTransformers.h b/Source/Readers/ImageReader/ImageTransformers.h
index 9033264be..060dfc8e4 100644
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@@ -48,8 +48,6 @@ protected:
     virtual void Apply(size_t id, cv::Mat &from) = 0;
 
 protected:
-    std::unique_ptr<ImageConfigHelper> m_imageConfig;
-
     StreamDescription m_inputStream;
     StreamDescription m_outputStream;
     unsigned int m_seed;

From 79f610b71e788dc6e1efa041efbb35cb59439f6a Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Fri, 13 May 2016 12:25:11 +0200
Subject: [PATCH 44/51] Adding a deserializer test for AlexNet

---
 .../EndToEndTests/Image/AlexNet/AlexNet.cntk  |  197 +-
 .../Image/AlexNet/AlexNetCommon.cntk          |   63 +
 .../Image/AlexNet/AlexNetComposite.cntk       |  101 +
 .../Composite/baseline.linux.debug.gpu.txt    | 2536 ++++++++++++++
 .../Composite/baseline.linux.release.gpu.txt  |  853 +++++
 .../Composite/baseline.windows.debug.gpu.txt  | 2984 +++++++++++++++++
 .../baseline.windows.release.gpu.txt          |  851 +++++
 .../Image/AlexNet/Composite/run-test          |   47 +
 .../Image/AlexNet/Composite/testcases.yml     |   31 +
 Tests/EndToEndTests/Image/AlexNet/run-test    |    6 +-
 10 files changed, 7517 insertions(+), 152 deletions(-)
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.debug.gpu.txt
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.release.gpu.txt
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.debug.gpu.txt
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.release.gpu.txt
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/run-test
 create mode 100644 Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml

diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index 153a05e05..7c896320d 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -1,160 +1,55 @@
-ModelDir = "$RunDir$/models"
-
-ndlMacros=$ConfigDir$/Macros.ndl
-
-precision=float
-deviceId=Auto
-
-command=Train:AddTop5Eval:Test
-
-parallelTrain=false
-
-traceLevel=1
-numMBsToShowResult=100
-
 Train=[
-    action=train
-    modelPath=$ModelDir$/AlexNet
-
-    NDLNetworkBuilder=[
-        networkDescription=$ConfigDir$/AlexNet.ndl
-    ]
-
-    SGD=[
-        epochSize=0
-        minibatchSize=16
-        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
-        momentumPerMB=0.9
-        maxEpochs=3
-        gradUpdateType=None
-        L2RegWeight=0.0005
-        dropoutRate=0*5:0.5
-        
-        ParallelTrain=[
-            parallelizationMethod=DataParallelSGD
-            distributedMBReading=true
-            parallelizationStartEpoch=1
-            DataParallelSGD=[
-                gradientBits=1
-            ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads=1
+        # Map file which maps images to labels using the following format:
+        # <full path to image><tab><numerical label (0-based class id)>
+        # Example:
+        # C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
+        file=$ConfigDir$/train_map.txt
+        # Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
+        randomize=Auto
+        features=[
+            # Below are the required parameters.
+            width=224
+            height=224
+            channels=3
+            # Below are the optional parameters.
+            # Possible values: Center, Random. Default: Center
+            cropType=Random
+            # Horizontal random flip, will be enabled by default if cropType=Random
+            #hflip=0
+            # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
+            cropRatio=0.875
+            # Crop scale ratio jitter type.
+            # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
+            jitterType=UniRatio
+            # Interpolation to use when scaling image to width x height size.
+            # Possible values: nearest, linear, cubic, lanczos. Default: linear.
+            interpolations=Linear
+            # Stores mean values for each pixel in OpenCV matrix XML format.
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
         ]
-        
-        numMBsToShowResult=100
-    ]
-
-    reader = [
-        verbosity=0
-        randomize=true
-
-        # Currently for image reader a single sequence is a chunk
-        # so setting randomization window to 1.
-        randomizationWindow=1
-        useLegacy=false
-
-        # A list of deserializers to use.
-        deserializers=[
-            [   
-                type="ImageDataDeserializer"
-                module="ImageReader"
-
-                # Map file which maps images to labels using the following format:
-                # <full path to image><tab><numerical label (0-based class id)>
-                # Example:
-                # C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
-                file=$ConfigDir$/train_map.txt
-
-                # Description of input streams
-                inputs=[
-                    features=[
-                        transforms=[
-                            [
-                                type="Crop"
-                                # Possible values: Center, Random. Default: Center
-                                cropType=Random
-                                # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
-                                cropRatio=0.875
-                                # Crop scale ratio jitter type.
-                                # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
-                                jitterType=UniRatio
-                            ]:[
-                                type="Scale"
-                                width=224
-                                height=224
-                                channels=3
-                                # Interpolation to use when scaling image to width x height size.
-                                # Possible values: nearest, linear, cubic, lanczos. Default: linear.
-                                interpolations=Linear
-                            ]:[
-                                type="Mean"
-                                # Stores mean values for each pixel in OpenCV matrix XML format.
-                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
-                            ]:[
-                                type="Transpose"
-                            ]
-                        ]
-                    ]
-                    labels=[
-                        labelDim=1000
-                    ]
-                ]
-            ]
+        labels=[
+            labelDim=1000
         ]
     ]    
 ]
 
-AddTop5Eval=[
-    action=edit
-    CurModel=$ModelDir$/AlexNet
-    NewModel=$ModelDir$/AlexNet.Top5
-    editPath=$ConfigDir$/add_top5_layer.mel
-]
-
-Test=[
-    action=test
-    modelPath=$ModelDir$/AlexNet.Top5
-    # Set minibatch size for testing.
-    minibatchSize=16
-
-     NDLNetworkBuilder=[
-        networkDescription=$ConfigDir$/AlexNet.ndl
-    ]
-    
+Test=[    
     reader=[
-        verbosity=0
-        randomize=false
-
-        # A list of deserializers to use.
-        deserializers=[
-            [
-                type="ImageDataDeserializer"
-                module="ImageReader"
-                file=$ConfigDir$/val_map.txt
-
-                # Description of input streams
-                inputs=[
-                    features=[
-                        transforms=[
-                            [
-                                type="Crop"
-                                cropType=Center
-                            ]:[
-                                type="Scale"
-                                width=224
-                                height=224
-                                channels=3
-                            ]:[
-                                type="Mean"
-                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
-                            ]:[
-                                type="Transpose"
-                            ]
-                        ]
-                    ]
-                    labels=[
-                        labelDim=1000
-                    ]
-                ]
-            ]
+        readerType=ImageReader
+        file=$ConfigDir$/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
         ]
-    ]        
+        labels=[
+            labelDim=1000
+        ]
+    ]    
 ]
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
new file mode 100644
index 000000000..ced92faf7
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
@@ -0,0 +1,63 @@
+ModelDir = "$RunDir$/models"
+
+ndlMacros=$ConfigDir$/Macros.ndl
+
+precision=float
+deviceId=Auto
+
+command=Train:AddTop5Eval:Test
+
+parallelTrain=false
+
+traceLevel=1
+numMBsToShowResult=100
+
+Train=[
+    action=train
+    modelPath=$ModelDir$/AlexNet
+
+    NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        
+        numMBsToShowResult=100
+    ]
+]
+
+
+AddTop5Eval=[    
+    action=edit
+    CurModel=$ModelDir$/AlexNet
+    NewModel=$ModelDir$/AlexNet.Top5
+    editPath=$ConfigDir$/add_top5_layer.mel
+]
+
+Test=[
+    action=test
+    modelPath=$ModelDir$/AlexNet.Top5
+    # Set minibatch size for testing.
+    minibatchSize=16
+
+     NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+]
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk
new file mode 100644
index 000000000..dd2f7b672
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNetComposite.cntk
@@ -0,0 +1,101 @@
+Train=[
+    reader = [
+        verbosity=0
+        randomize=true
+
+        # Currently for image reader a single sequence is a chunk
+        # so setting randomization window to 1.
+        randomizationWindow=1
+        useLegacy=false
+
+        # A list of deserializers to use.
+        deserializers=[
+            [   
+                type="ImageDataDeserializer"
+                module="ImageReader"
+
+                # Map file which maps images to labels using the following format:
+                # <full path to image><tab><numerical label (0-based class id)>
+                # Example:
+                # C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
+                file=$ConfigDir$/train_map.txt
+
+                # Description of input streams
+                inputs=[
+                    features=[
+                        transforms=[
+                            [
+                                type="Crop"
+                                # Possible values: Center, Random. Default: Center
+                                cropType=Random
+                                # Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
+                                cropRatio=0.875
+                                # Crop scale ratio jitter type.
+                                # Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
+                                jitterType=UniRatio
+                            ]:[
+                                type="Scale"
+                                width=224
+                                height=224
+                                channels=3
+                                # Interpolation to use when scaling image to width x height size.
+                                # Possible values: nearest, linear, cubic, lanczos. Default: linear.
+                                interpolations=Linear
+                            ]:[
+                                type="Mean"
+                                # Stores mean values for each pixel in OpenCV matrix XML format.
+                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
+                            ]:[
+                                type="Transpose"
+                            ]
+                        ]
+                    ]
+                    labels=[
+                        labelDim=1000
+                    ]
+                ]
+            ]
+        ]
+    ]    
+]
+
+Test=[
+    reader=[
+        verbosity=0
+        randomize=false
+
+        # A list of deserializers to use.
+        deserializers=[
+            [
+                type="ImageDataDeserializer"
+                module="ImageReader"
+                file=$ConfigDir$/val_map.txt
+
+                # Description of input streams
+                inputs=[
+                    features=[
+                        transforms=[
+                            [
+                                type="Crop"
+                                cropType=Center
+                            ]:[
+                                type="Scale"
+                                width=224
+                                height=224
+                                channels=3
+                            ]:[
+                                type="Mean"
+                                meanFile=$ConfigDir$/ImageNet1K_mean.xml
+                            ]:[
+                                type="Transpose"
+                            ]
+                        ]
+                    ]
+                    labels=[
+                        labelDim=1000
+                    ]
+                ]
+            ]
+        ]
+    ]        
+]
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.debug.gpu.txt b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.debug.gpu.txt
new file mode 100644
index 000000000..924abab33
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.debug.gpu.txt
@@ -0,0 +1,2536 @@
+Copying test data to local directory
+=== Running /home/eldar/repo/cntk/CNTK/build/gpu/debug/bin/cntk configFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.config currentDirectory=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData RunDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu DataDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData ConfigDir=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet DeviceId=0
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: Jan 29 2016 11:24:31
+		Last modified date: Fri Jan 29 11:02:27 2016
+		Build type: release
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.0
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: master
+		Build SHA1: 3224f026263714bc0a281db6452cafb5ff991ab7
+-------------------------------------------------------------------
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: Jan 29 2016 11:24:31
+		Last modified date: Fri Jan 29 11:02:27 2016
+		Build type: release
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.0
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: master
+		Build SHA1: 3224f026263714bc0a281db6452cafb5ff991ab7
+-------------------------------------------------------------------
+running on localhost at 2016/01/29 13:42:24
+command line: 
+/home/eldar/repo/cntk/CNTK/build/gpu/debug/bin/cntk configFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.config currentDirectory=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData RunDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu DataDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData ConfigDir=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet DeviceId=0 
+
+>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+ModelDir = "$RunDir$/models"
+ndlMacros=$ConfigDir$/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=$ModelDir$/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=$ModelDir$/AlexNet
+    NewModel=$ModelDir$/AlexNet.Top5
+    editPath=$ConfigDir$/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=$ModelDir$/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=$ConfigDir$/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+RunDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu
+DataDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+ConfigDir=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet
+DeviceId=0
+
+<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+ModelDir = "/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models"
+ndlMacros=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet
+    NewModel=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    editPath=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+RunDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu
+DataDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+ConfigDir=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet
+DeviceId=0
+
+<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: AlexNet.config:AddTop5Eval=[    
+    action=edit
+    CurModel=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet
+    NewModel=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    editPath=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/add_top5_layer.mel
+]
+
+configparameters: AlexNet.config:command=Train:AddTop5Eval:Test
+configparameters: AlexNet.config:ConfigDir=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet
+configparameters: AlexNet.config:currentDirectory=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+configparameters: AlexNet.config:DataDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/TestData
+configparameters: AlexNet.config:deviceId=0
+configparameters: AlexNet.config:ModelDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models
+configparameters: AlexNet.config:ndlMacros=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/Macros.ndl
+configparameters: AlexNet.config:numMBsToShowResult=100
+configparameters: AlexNet.config:parallelTrain=false
+configparameters: AlexNet.config:precision=float
+configparameters: AlexNet.config:RunDir=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu
+configparameters: AlexNet.config:Test=[
+    action=test
+    modelPath=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+configparameters: AlexNet.config:traceLevel=1
+configparameters: AlexNet.config:Train=[
+    action=train
+    modelPath=/tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=/home/eldar/repo/cntk/CNTK/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+command: Train AddTop5Eval Test 
+precision = float
+CNTKModelPath: /tmp/cntk-test-20160129134223.237233/Image_AlexNet@debug_gpu/models/AlexNet
+CNTKCommandTrainInfo: Train : 3
+CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+CNTKCommandTrainBegin: Train
+NDLBuilder Using GPU 0
+SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus
+	CE = CrossEntropyWithSoftmax
+	Err = ErrorPrediction
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+SGD using GPU 0.
+
+Training criterion node(s):
+	CE = CrossEntropyWithSoftmax
+
+Evaluation criterion node(s):
+	Err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+No PreCompute nodes found, skipping PreCompute step
+Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+Starting Epoch 1: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 1 of 3]-Minibatch[   1- 100]: * 1600; ce =  7.41011780; err = 1.00000000; TotalTime = 22.4203s; SamplesPerSecond = 71.4
+Finished Epoch[ 1 of 3]: [Training] ce = 7.2227564; err = 0.99966657; learningRatePerSample = 0.00062499999; EpochTime=41.5204
+Starting Epoch 2: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 2 of 3]-Minibatch[   1- 100, 100.00%]: * 1600; ce =  6.90544067; err = 0.99750000; TotalTime = 21.9407s; SamplesPerSecond = 72.9
+Finished Epoch[ 2 of 3]: [Training] ce = 6.9181705; err = 0.99799931; learningRatePerSample = 0.00062499999; EpochTime=41.0468
+Starting Epoch 3: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 3 of 3]-Minibatch[   1- 100, 100.00%]: * 1600; ce =  6.87414368; err = 0.99812500; TotalTime = 21.8825s; SamplesPerSecond = 73.1
+Finished Epoch[ 3 of 3]: [Training] ce = 6.884582; err = 0.99799931; learningRatePerSample = 0.00062499999; EpochTime=40.9431
+CNTKCommandTrainEnd: Train
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+
+Post-processing network...
+
+3 roots:
+	CE = CrossEntropyWithSoftmax
+	Err = ErrorPrediction
+	OutputNodes.z = Plus
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+Post-processing network...
+
+4 roots:
+	CE = CrossEntropyWithSoftmax
+	Err = ErrorPrediction
+	errTop5 = ErrorPrediction
+	OutputNodes.z = Plus
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for errTop5 ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node errTop5. 48 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+Post-processing network...
+
+4 roots:
+	CE = CrossEntropyWithSoftmax
+	errTop5 = ErrorPrediction
+	Err = ErrorPrediction
+	OutputNodes.z = Plus
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for errTop5 ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node errTop5. 48 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+Minibatch[1-32]: Samples Seen = 500    Err: ErrorPrediction/Sample = 0.998    errTop5: ErrorPrediction/Sample = 0.992    CE: CrossEntropyWithSoftmax/Sample = 6.9591762    
+Final Results: Minibatch[1-32]: Samples Seen = 500    Err: ErrorPrediction/Sample = 0.998    errTop5: ErrorPrediction/Sample = 0.992    CE: CrossEntropyWithSoftmax/Sample = 6.9591762    perplexity = 1052.766    
+__COMPLETED__
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.release.gpu.txt b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.release.gpu.txt
new file mode 100644
index 000000000..ecd1b86e7
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.linux.release.gpu.txt
@@ -0,0 +1,853 @@
+Copying test data to local directory
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk currentDirectory=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData RunDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu DataDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet OutputDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu DeviceId=0 timestamping=true
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May  3 2016 17:56:15
+		Last modified date: Tue May  3 11:36:22 2016
+		Build type: release
+		Build target: GPU
+		With 1bit-SGD: no
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.5
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: HEAD
+		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+		Built by philly on 18750d26eb32
+		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+-------------------------------------------------------------------
+Changed current directory to /tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+05/03/2016 18:06:53: -------------------------------------------------------------------
+05/03/2016 18:06:53: Build info: 
+
+05/03/2016 18:06:53: 		Built time: May  3 2016 17:56:15
+05/03/2016 18:06:53: 		Last modified date: Tue May  3 11:36:22 2016
+05/03/2016 18:06:53: 		Build type: release
+05/03/2016 18:06:53: 		Build target: GPU
+05/03/2016 18:06:53: 		With 1bit-SGD: no
+05/03/2016 18:06:53: 		Math lib: acml
+05/03/2016 18:06:53: 		CUDA_PATH: /usr/local/cuda-7.5
+05/03/2016 18:06:53: 		CUB_PATH: /usr/local/cub-1.4.1
+05/03/2016 18:06:53: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/03/2016 18:06:53: 		Build Branch: HEAD
+05/03/2016 18:06:53: 		Build SHA1: 571b092d60e131fd529081a5ed52af2dc815dc82
+05/03/2016 18:06:53: 		Built by philly on 18750d26eb32
+05/03/2016 18:06:53: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/03/2016 18:06:53: -------------------------------------------------------------------
+
+05/03/2016 18:06:53: Running on localhost at 2016/05/03 18:06:53
+05/03/2016 18:06:53: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk  currentDirectory=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData  RunDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu  DataDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet  OutputDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu  DeviceId=0  timestamping=true
+
+
+
+05/03/2016 18:06:53: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:06:53: ModelDir = "$RunDir$/models"
+ndlMacros=$ConfigDir$/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=$ModelDir$/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=$ModelDir$/AlexNet
+    NewModel=$ModelDir$/AlexNet.Top5
+    editPath=$ConfigDir$/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=$ModelDir$/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+DataDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet
+OutputDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+DeviceId=0
+timestamping=true
+
+05/03/2016 18:06:53: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/03/2016 18:06:53: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 18:06:53: ModelDir = "/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models"
+ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet
+    NewModel=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.Top5
+    editPath=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+DataDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet
+OutputDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+DeviceId=0
+timestamping=true
+
+05/03/2016 18:06:53: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/03/2016 18:06:53: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: AlexNet.cntk:AddTop5Eval=[    
+    action=edit
+    CurModel=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet
+    NewModel=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.Top5
+    editPath=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/add_top5_layer.mel
+]
+
+configparameters: AlexNet.cntk:command=Train:AddTop5Eval:Test
+configparameters: AlexNet.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet
+configparameters: AlexNet.cntk:currentDirectory=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+configparameters: AlexNet.cntk:DataDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/TestData
+configparameters: AlexNet.cntk:deviceId=0
+configparameters: AlexNet.cntk:ModelDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models
+configparameters: AlexNet.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/Macros.ndl
+configparameters: AlexNet.cntk:numMBsToShowResult=100
+configparameters: AlexNet.cntk:OutputDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+configparameters: AlexNet.cntk:parallelTrain=false
+configparameters: AlexNet.cntk:precision=float
+configparameters: AlexNet.cntk:RunDir=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu
+configparameters: AlexNet.cntk:Test=[
+    action=test
+    modelPath=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+configparameters: AlexNet.cntk:timestamping=true
+configparameters: AlexNet.cntk:traceLevel=1
+configparameters: AlexNet.cntk:Train=[
+    action=train
+    modelPath=/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Image/AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+05/03/2016 18:06:53: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 18:06:53: Commands: Train AddTop5Eval Test
+05/03/2016 18:06:53: Precision = "float"
+05/03/2016 18:06:53: CNTKModelPath: /tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet
+05/03/2016 18:06:53: CNTKCommandTrainInfo: Train : 3
+05/03/2016 18:06:53: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/03/2016 18:06:53: ##############################################################################
+05/03/2016 18:06:53: #                                                                            #
+05/03/2016 18:06:53: # Action "train"                                                             #
+05/03/2016 18:06:53: #                                                                            #
+05/03/2016 18:06:53: ##############################################################################
+
+05/03/2016 18:06:53: CNTKCommandTrainBegin: Train
+NDLBuilder Using GPU 0
+
+05/03/2016 18:06:53: Creating virgin network.
+SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+
+Validating network. 48 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *] -> [56 x 56 x 64 x *]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *], [1 x 1 x 64] -> [56 x 56 x 64 x *]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *] -> [56 x 56 x 64 x *]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *] -> [27 x 27 x 64 x *]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *] -> [27 x 27 x 192 x *]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *], [1 x 1 x 192] -> [27 x 27 x 192 x *]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *] -> [27 x 27 x 192 x *]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *] -> [13 x 13 x 192 x *]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *] -> [13 x 13 x 384 x *]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *], [1 x 1 x 384] -> [13 x 13 x 384 x *]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *] -> [13 x 13 x 384 x *]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *] -> [13 x 13 x 256 x *]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *], [1 x 1 x 256] -> [13 x 13 x 256 x *]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *], [1 x 1 x 256] -> [13 x 13 x 256 x *]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *] -> [6 x 6 x 256 x *]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *] -> [4096 x *]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *], [4096] -> [4096 x *]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *] -> [4096 x *]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *] -> [4096 x *]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *] -> [4096 x *]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *], [4096] -> [4096 x *]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *] -> [4096 x *]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *] -> [4096 x *]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *] -> [1000 x *]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *], [1000] -> [1000 x *]
+Validating --> labels = InputValue() :  -> [1000 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *], [1000 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *], [1000 x *] -> [1]
+
+Validating network. 30 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/03/2016 18:06:53: Created model with 48 nodes on GPU 0.
+
+05/03/2016 18:06:53: Training criterion node(s):
+05/03/2016 18:06:53: 	ce = CrossEntropyWithSoftmax
+
+05/03/2016 18:06:53: Evaluation criterion node(s):
+
+05/03/2016 18:06:53: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[err Gradient[1]] [features Gradient[224 x 224 x 3 x *]] [labels Gradient[1000 x *]] }
+0x1eb05c8: {[features Value[224 x 224 x 3 x *]] }
+0x27d0c58: {[conv1.W Value[64 x 363]] }
+0x27d1a38: {[conv1.b Value[1 x 1 x 64]] }
+0x27d32a8: {[conv2.W Value[192 x 1600]] }
+0x27d49b8: {[conv2.b Value[1 x 1 x 192]] }
+0x27d5c88: {[conv3.W Value[384 x 1728]] }
+0x27d7378: {[conv3.b Value[1 x 1 x 384]] }
+0x27d8698: {[conv4.W Value[256 x 3456]] }
+0x27d9798: {[OutputNodes.b Value[1000]] }
+0x27d9b88: {[conv4.b Value[1 x 1 x 256]] }
+0x27dadf8: {[conv5.W Value[256 x 2304]] }
+0x27dbff8: {[conv5.b Value[1 x 1 x 256]] }
+0x27dd778: {[h1.W Value[4096 x 6 x 6 x 256]] }
+0x27de688: {[h1.b Value[4096]] }
+0x2c0cab8: {[labels Value[1000 x *]] }
+0x2ea6e78: {[h2.W Value[4096 x 4096]] }
+0x2ea7c18: {[h2.b Value[4096]] }
+0x2ea8838: {[OutputNodes.W Value[1000 x 4096]] }
+0x7f47b2c352e8: {[conv1.c Gradient[56 x 56 x 64 x *]] [conv1.y Value[56 x 56 x 64 x *]] }
+0x7f47b2c35448: {[conv1.W Gradient[64 x 363]] [conv1.z Value[56 x 56 x 64 x *]] }
+0x7f47b2c35648: {[conv1.z Gradient[56 x 56 x 64 x *]] [pool1 Value[27 x 27 x 64 x *]] }
+0x7f47b2c35948: {[conv1.c Value[56 x 56 x 64 x *]] }
+0x7f47b2e95948: {[conv1.b Gradient[1 x 1 x 64]] [conv1.y Gradient[56 x 56 x 64 x *]] }
+0x7f47b2e95b08: {[conv2.W Gradient[192 x 1600]] [conv2.z Value[27 x 27 x 192 x *]] }
+0x7f47b2e95cc8: {[conv2.c Gradient[27 x 27 x 192 x *]] [conv2.y Value[27 x 27 x 192 x *]] }
+0x7f47b2e95e88: {[conv2.z Gradient[27 x 27 x 192 x *]] [pool1 Gradient[27 x 27 x 64 x *]] [pool2 Value[13 x 13 x 192 x *]] }
+0x7f47b2e96048: {[conv3.c Value[13 x 13 x 384 x *]] }
+0x7f47b2e96208: {[conv2.b Gradient[1 x 1 x 192]] [conv2.y Gradient[27 x 27 x 192 x *]] }
+0x7f47b2e963c8: {[conv3.W Gradient[384 x 1728]] [conv3.z Value[13 x 13 x 384 x *]] }
+0x7f47b2e96588: {[conv3.c Gradient[13 x 13 x 384 x *]] [conv3.y Value[13 x 13 x 384 x *]] }
+0x7f47b2e96748: {[conv4.c Value[13 x 13 x 256 x *]] }
+0x7f47b2e96908: {[conv3.z Gradient[13 x 13 x 384 x *]] [pool2 Gradient[13 x 13 x 192 x *]] }
+0x7f47b2e96ac8: {[conv4.W Gradient[256 x 3456]] [conv4.z Value[13 x 13 x 256 x *]] }
+0x7f47b2e96c88: {[conv4.c Gradient[13 x 13 x 256 x *]] [conv4.y Value[13 x 13 x 256 x *]] }
+0x7f47b2e96e48: {[conv5.c Value[13 x 13 x 256 x *]] }
+0x7f47b2e97008: {[conv3.b Gradient[1 x 1 x 384]] [conv3.y Gradient[13 x 13 x 384 x *]] [conv4.z Gradient[13 x 13 x 256 x *]] }
+0x7f47b2e971c8: {[conv5.W Gradient[256 x 2304]] [conv5.z Value[13 x 13 x 256 x *]] }
+0x7f47b2e97388: {[conv5.c Gradient[13 x 13 x 256 x *]] [conv5.y Value[13 x 13 x 256 x *]] }
+0x7f47b2e97548: {[conv4.b Gradient[1 x 1 x 256]] [conv4.y Gradient[13 x 13 x 256 x *]] [conv5.z Gradient[13 x 13 x 256 x *]] [pool3 Value[6 x 6 x 256 x *]] }
+0x7f47b2e97708: {[conv5.b Gradient[1 x 1 x 256]] [conv5.y Gradient[13 x 13 x 256 x *]] [h1.t Value[4096 x *]] }
+0x7f47b2e978c8: {[h1.W Gradient[4096 x 6 x 6 x 256]] [h1.z Value[4096 x *]] }
+0x7f47b2e97a88: {[h1.t Gradient[4096 x *]] [h1.y Value[4096 x *]] }
+0x7f47b2e97c48: {[h1_d Value[4096 x *]] }
+0x7f47b2e97e08: {[h1.z Gradient[4096 x *]] [pool3 Gradient[6 x 6 x 256 x *]] }
+0x7f47b2e97fc8: {[h1.b Gradient[4096]] [h1.y Gradient[4096 x *]] [h2.t Value[4096 x *]] }
+0x7f47b2e98188: {[h2.W Gradient[4096 x 4096]] [h2.z Value[4096 x *]] }
+0x7f47b2e98348: {[h2.t Gradient[4096 x *]] [h2.y Value[4096 x *]] }
+0x7f47b2e98508: {[h2_d Value[4096 x *]] }
+0x7f47b2e986c8: {[h1_d Gradient[4096 x *]] [h2.z Gradient[4096 x *]] }
+0x7f47b2e98888: {[OutputNodes.t Value[1000 x *]] [h2.b Gradient[4096]] [h2.y Gradient[4096 x *]] }
+0x7f47b2e99428: {[ce Gradient[1]] }
+0x7f47b2e995e8: {[OutputNodes.W Gradient[1000 x 4096]] [OutputNodes.z Gradient[1000 x *]] }
+0x7f47b2e997a8: {[OutputNodes.t Gradient[1000 x *]] }
+0x7f47b2e99968: {[OutputNodes.b Gradient[1000]] }
+0x7f47b2e99b28: {[h2_d Gradient[4096 x *]] }
+0x7f47b2e9aa08: {[OutputNodes.z Value[1000 x *]] }
+0x7f47b2e9abc8: {[ce Value[1]] }
+0x7f47b2e9b2f8: {[conv2.c Value[27 x 27 x 192 x *]] }
+0x7f47b2ef4ce8: {[err Value[1]] }
+
+05/03/2016 18:06:53: No PreCompute nodes found, skipping PreCompute step.
+
+05/03/2016 18:06:55: Starting Epoch 1: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 18:06:55: Starting minibatch loop.
+05/03/2016 18:07:02:  Epoch[ 1 of 3]-Minibatch[   1- 100]: ce = 7.41642395 * 1600; err = 1.00000000 * 1600; time = 7.0425s; samplesPerSecond = 227.2
+05/03/2016 18:07:08: Finished Epoch[ 1 of 3]: [Training] ce = 7.22737918 * 2999; err = 0.99966656 * 2999; totalSamplesSeen = 2999; learningRatePerSample = 0.00062499999; epochTime=12.9259s
+05/03/2016 18:07:10: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.1'
+
+05/03/2016 18:07:13: Starting Epoch 2: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 18:07:13: Starting minibatch loop.
+05/03/2016 18:07:19:  Epoch[ 2 of 3]-Minibatch[   1- 100, 100.00%]: ce = 6.90983215 * 1600; err = 1.00000000 * 1600; time = 6.2320s; samplesPerSecond = 256.7
+05/03/2016 18:07:25: Finished Epoch[ 2 of 3]: [Training] ce = 6.91963923 * 2999; err = 0.99866622 * 2999; totalSamplesSeen = 5998; learningRatePerSample = 0.00062499999; epochTime=12.2905s
+05/03/2016 18:07:27: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet.2'
+
+05/03/2016 18:07:29: Starting Epoch 3: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 18:07:29: Starting minibatch loop.
+05/03/2016 18:07:36:  Epoch[ 3 of 3]-Minibatch[   1- 100, 100.00%]: ce = 6.87519836 * 1600; err = 0.99937500 * 1600; time = 6.4714s; samplesPerSecond = 247.2
+05/03/2016 18:07:42: Finished Epoch[ 3 of 3]: [Training] ce = 6.88608052 * 2999; err = 0.99833278 * 2999; totalSamplesSeen = 8997; learningRatePerSample = 0.00062499999; epochTime=12.1425s
+05/03/2016 18:07:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160503180555.960884/Image_AlexNet@release_gpu/models/AlexNet'
+05/03/2016 18:07:46: CNTKCommandTrainEnd: Train
+
+05/03/2016 18:07:46: Action "train" complete.
+
+
+05/03/2016 18:07:46: ##############################################################################
+05/03/2016 18:07:46: #                                                                            #
+05/03/2016 18:07:46: # Action "edit"                                                              #
+05/03/2016 18:07:46: #                                                                            #
+05/03/2016 18:07:46: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+
+Validating network. 48 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *1]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *1] -> [56 x 56 x 64 x *1]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *1], [1 x 1 x 64] -> [56 x 56 x 64 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *1] -> [56 x 56 x 64 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *1] -> [27 x 27 x 64 x *1]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *1] -> [27 x 27 x 192 x *1]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *1], [1 x 1 x 192] -> [27 x 27 x 192 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *1] -> [27 x 27 x 192 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *1] -> [13 x 13 x 192 x *1]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *1], [1 x 1 x 384] -> [13 x 13 x 384 x *1]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *1] -> [6 x 6 x 256 x *1]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *1] -> [4096 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *1] -> [4096 x *1]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *1] -> [4096 x *1]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *1] -> [4096 x *1]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *1] -> [1000 x *1]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *1], [1000] -> [1000 x *1]
+Validating --> labels = InputValue() :  -> [1000 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+
+Validating network. 30 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using GEMM convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+
+Post-processing network...
+
+4 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop5 = ErrorPrediction()
+
+Validating network. 50 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *1]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *1] -> [56 x 56 x 64 x *1]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *1], [1 x 1 x 64] -> [56 x 56 x 64 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *1] -> [56 x 56 x 64 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *1] -> [27 x 27 x 64 x *1]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *1] -> [27 x 27 x 192 x *1]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *1], [1 x 1 x 192] -> [27 x 27 x 192 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *1] -> [27 x 27 x 192 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *1] -> [13 x 13 x 192 x *1]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *1], [1 x 1 x 384] -> [13 x 13 x 384 x *1]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *1] -> [6 x 6 x 256 x *1]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *1] -> [4096 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *1] -> [4096 x *1]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *1] -> [4096 x *1]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *1] -> [4096 x *1]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *1] -> [1000 x *1]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *1], [1000] -> [1000 x *1]
+Validating --> labels = InputValue() :  -> [1000 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> unnamed137 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop5 = ErrorPrediction (labels, OutputNodes.z, unnamed137) : [1000 x *1], [1000 x *1], [1 x 1] -> [1]
+
+Validating network. 31 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+20 out of 50 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+
+05/03/2016 18:07:51: Action "edit" complete.
+
+
+05/03/2016 18:07:51: ##############################################################################
+05/03/2016 18:07:51: #                                                                            #
+05/03/2016 18:07:51: # Action "test"                                                              #
+05/03/2016 18:07:51: #                                                                            #
+05/03/2016 18:07:51: ##############################################################################
+
+
+Post-processing network...
+
+4 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop5 = ErrorPrediction()
+
+Validating network. 50 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *2]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *2] -> [56 x 56 x 64 x *2]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *2], [1 x 1 x 64] -> [56 x 56 x 64 x *2]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *2] -> [56 x 56 x 64 x *2]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *2] -> [27 x 27 x 64 x *2]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *2] -> [27 x 27 x 192 x *2]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *2], [1 x 1 x 192] -> [27 x 27 x 192 x *2]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *2] -> [27 x 27 x 192 x *2]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *2] -> [13 x 13 x 192 x *2]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *2] -> [13 x 13 x 384 x *2]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *2], [1 x 1 x 384] -> [13 x 13 x 384 x *2]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *2] -> [13 x 13 x 384 x *2]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *2], [1 x 1 x 256] -> [13 x 13 x 256 x *2]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *2], [1 x 1 x 256] -> [13 x 13 x 256 x *2]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *2] -> [6 x 6 x 256 x *2]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *2] -> [4096 x *2]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *2], [4096] -> [4096 x *2]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *2] -> [4096 x *2]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *2] -> [4096 x *2]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *2] -> [4096 x *2]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *2], [4096] -> [4096 x *2]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *2] -> [4096 x *2]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *2] -> [4096 x *2]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *2] -> [1000 x *2]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *2], [1000] -> [1000 x *2]
+Validating --> labels = InputValue() :  -> [1000 x *2]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *2], [1000 x *2] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *2], [1000 x *2] -> [1]
+Validating --> unnamed137 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop5 = ErrorPrediction (labels, OutputNodes.z, unnamed137) : [1000 x *2], [1000 x *2], [1 x 1] -> [1]
+
+Validating network. 31 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+20 out of 50 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[OutputNodes.W Gradient[1000 x 4096]] [OutputNodes.b Gradient[1000]] [OutputNodes.t Gradient[1000 x *2]] [OutputNodes.z Gradient[1000 x *2]] [ce Gradient[1]] [conv1.W Gradient[64 x 363]] [conv1.b Gradient[1 x 1 x 64]] [conv1.c Gradient[56 x 56 x 64 x *2]] [conv1.y Gradient[56 x 56 x 64 x *2]] [conv1.z Gradient[56 x 56 x 64 x *2]] [conv2.W Gradient[192 x 1600]] [conv2.b Gradient[1 x 1 x 192]] [conv2.c Gradient[27 x 27 x 192 x *2]] [conv2.y Gradient[27 x 27 x 192 x *2]] [conv2.z Gradient[27 x 27 x 192 x *2]] [conv3.W Gradient[384 x 1728]] [conv3.b Gradient[1 x 1 x 384]] [conv3.c Gradient[13 x 13 x 384 x *2]] [conv3.y Gradient[13 x 13 x 384 x *2]] [conv3.z Gradient[13 x 13 x 384 x *2]] [conv4.W Gradient[256 x 3456]] [conv4.b Gradient[1 x 1 x 256]] [conv4.c Gradient[13 x 13 x 256 x *2]] [conv4.y Gradient[13 x 13 x 256 x *2]] [conv4.z Gradient[13 x 13 x 256 x *2]] [conv5.W Gradient[256 x 2304]] [conv5.b Gradient[1 x 1 x 256]] [conv5.c Gradient[13 x 13 x 256 x *2]] [conv5.y Gradient[13 x 13 x 256 x *2]] [conv5.z Gradient[13 x 13 x 256 x *2]] [err Gradient[1]] [errTop5 Gradient[1]] [features Gradient[224 x 224 x 3 x *2]] [h1.W Gradient[4096 x 6 x 6 x 256]] [h1.b Gradient[4096]] [h1.t Gradient[4096 x *2]] [h1.y Gradient[4096 x *2]] [h1.z Gradient[4096 x *2]] [h1_d Gradient[4096 x *2]] [h2.W Gradient[4096 x 4096]] [h2.b Gradient[4096]] [h2.t Gradient[4096 x *2]] [h2.y Gradient[4096 x *2]] [h2.z Gradient[4096 x *2]] [h2_d Gradient[4096 x *2]] [labels Gradient[1000 x *2]] [pool1 Gradient[27 x 27 x 64 x *2]] [pool2 Gradient[13 x 13 x 192 x *2]] [pool3 Gradient[6 x 6 x 256 x *2]] [unnamed137 Gradient[1 x 1]] }
+0x7f479db02088: {[conv1.b Value[1 x 1 x 64]] }
+0x7f479db2c418: {[conv1.W Value[64 x 363]] }
+0x7f479db2d7a8: {[conv2.W Value[192 x 1600]] }
+0x7f479db2dae8: {[conv2.b Value[1 x 1 x 192]] }
+0x7f479db2fdd8: {[conv3.W Value[384 x 1728]] }
+0x7f479db30118: {[conv3.b Value[1 x 1 x 384]] }
+0x7f479db30908: {[conv4.b Value[1 x 1 x 256]] }
+0x7f479db33f08: {[conv4.W Value[256 x 3456]] }
+0x7f479db35358: {[conv5.b Value[1 x 1 x 256]] }
+0x7f479db36608: {[conv5.W Value[256 x 2304]] }
+0x7f479db37d68: {[features Value[224 x 224 x 3 x *2]] }
+0x7f479db38858: {[h1.W Value[4096 x 6 x 6 x 256]] }
+0x7f479db38b98: {[h1.b Value[4096]] }
+0x7f479db3aa98: {[h2.b Value[4096]] }
+0x7f479db3b5d8: {[h2.W Value[4096 x 4096]] }
+0x7f479db3ca98: {[labels Value[1000 x *2]] }
+0x7f479db3de18: {[OutputNodes.b Value[1000]] }
+0x7f479db3e628: {[OutputNodes.W Value[1000 x 4096]] }
+0x7f479db40748: {[unnamed137 Value[1 x 1]] }
+0x7f479db413e8: {[errTop5 Value[1]] }
+0x7f479db42138: {[ce Value[1]] }
+0x7f479db48378: {[err Value[1]] }
+0x7f479db53e18: {[pool3 Value[6 x 6 x 256 x *2]] }
+0x7f479db53fd8: {[h1.t Value[4096 x *2]] }
+0x7f479db54198: {[h1.z Value[4096 x *2]] }
+0x7f479db54358: {[h1.y Value[4096 x *2]] }
+0x7f479db54518: {[h1_d Value[4096 x *2]] }
+0x7f479db54898: {[h2.t Value[4096 x *2]] }
+0x7f479db54a58: {[h2.z Value[4096 x *2]] }
+0x7f479db54c18: {[h2.y Value[4096 x *2]] }
+0x7f479db54dd8: {[h2_d Value[4096 x *2]] }
+0x7f479db55158: {[OutputNodes.t Value[1000 x *2]] }
+0x7f479db55318: {[OutputNodes.z Value[1000 x *2]] }
+0x7f47a644f258: {[conv1.z Value[56 x 56 x 64 x *2]] }
+0x7f47a644f558: {[conv1.c Value[56 x 56 x 64 x *2]] }
+0x7f47a6450068: {[conv1.y Value[56 x 56 x 64 x *2]] }
+0x7f47a64506b8: {[pool1 Value[27 x 27 x 64 x *2]] }
+0x7f47a6450878: {[conv2.c Value[27 x 27 x 192 x *2]] }
+0x7f47a6450bf8: {[conv2.z Value[27 x 27 x 192 x *2]] }
+0x7f47a6450db8: {[conv2.y Value[27 x 27 x 192 x *2]] }
+0x7f47a6450f78: {[pool2 Value[13 x 13 x 192 x *2]] }
+0x7f47a6451138: {[conv3.c Value[13 x 13 x 384 x *2]] }
+0x7f47a64514b8: {[conv3.z Value[13 x 13 x 384 x *2]] }
+0x7f47a6451678: {[conv3.y Value[13 x 13 x 384 x *2]] }
+0x7f47a6451838: {[conv4.c Value[13 x 13 x 256 x *2]] }
+0x7f47a6451bb8: {[conv4.z Value[13 x 13 x 256 x *2]] }
+0x7f47a6451d78: {[conv4.y Value[13 x 13 x 256 x *2]] }
+0x7f47a6451f38: {[conv5.c Value[13 x 13 x 256 x *2]] }
+0x7f47a64522b8: {[conv5.z Value[13 x 13 x 256 x *2]] }
+0x7f47a6452478: {[conv5.y Value[13 x 13 x 256 x *2]] }
+
+05/03/2016 18:07:55: Final Results: Minibatch[1-32]: err = 0.99800000 * 500; errTop5 = 0.99400000 * 500; ce = 6.96324823 * 500; perplexity = 1057.06156985
+
+05/03/2016 18:07:55: Action "test" complete.
+
+05/03/2016 18:07:55: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.debug.gpu.txt b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.debug.gpu.txt
new file mode 100644
index 000000000..77b5718b3
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.debug.gpu.txt
@@ -0,0 +1,2984 @@
+Copying test data to local directory
+=== Running /cygdrive/c/repo/cntk_github5/cntk/x64/debug/cntk.exe configFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.config currentDirectory=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData RunDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu DataDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData ConfigDir=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet DeviceId=0
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: Jan 28 2016 13:36:22
+		Last modified date: Thu Jan 28 13:25:59 2016
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0
+		CUB_PATH: c:\Tools\cub-1.4.1\
+		CUDNN_PATH: c:\Tools\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: 31a164602c629d10741761443e6e46b2ab787ad5
+		Built by eldak on ELDAK-0           
+		Build Path: c:\repo\cntk_github5\CNTK\Source\CNTK\
+-------------------------------------------------------------------
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: Jan 28 2016 13:36:22
+		Last modified date: Thu Jan 28 13:25:59 2016
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0
+		CUB_PATH: c:\Tools\cub-1.4.1\
+		CUDNN_PATH: c:\Tools\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: 31a164602c629d10741761443e6e46b2ab787ad5
+		Built by eldak on ELDAK-0           
+		Build Path: c:\repo\cntk_github5\CNTK\Source\CNTK\
+-------------------------------------------------------------------
+running on ELDAK-0 at 2016/01/28 14:53:56
+command line: 
+C:\repo\cntk_github5\cntk\x64\debug\cntk.exe configFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.config currentDirectory=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData RunDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu DataDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData ConfigDir=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet DeviceId=0 
+
+>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+ModelDir = "$RunDir$/models"
+ndlMacros=$ConfigDir$/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=$ModelDir$/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=$ModelDir$/AlexNet
+    NewModel=$ModelDir$/AlexNet.Top5
+    editPath=$ConfigDir$/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=$ModelDir$/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=$ConfigDir$/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+RunDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu
+DataDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+ConfigDir=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet
+DeviceId=0
+
+<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+ModelDir = "F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models"
+ndlMacros=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet
+    NewModel=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    editPath=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+RunDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu
+DataDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+ConfigDir=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet
+DeviceId=0
+
+<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: AlexNet.config:AddTop5Eval=[    
+    action=edit
+    CurModel=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet
+    NewModel=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    editPath=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/add_top5_layer.mel
+]
+
+configparameters: AlexNet.config:command=Train:AddTop5Eval:Test
+configparameters: AlexNet.config:ConfigDir=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet
+configparameters: AlexNet.config:currentDirectory=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+configparameters: AlexNet.config:DataDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu\TestData
+configparameters: AlexNet.config:deviceId=0
+configparameters: AlexNet.config:ModelDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models
+configparameters: AlexNet.config:ndlMacros=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/Macros.ndl
+configparameters: AlexNet.config:numMBsToShowResult=100
+configparameters: AlexNet.config:parallelTrain=false
+configparameters: AlexNet.config:precision=float
+configparameters: AlexNet.config:RunDir=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu
+configparameters: AlexNet.config:Test=[
+    action=test
+    modelPath=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        numCPUThreads = 1
+        file=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+configparameters: AlexNet.config:traceLevel=1
+configparameters: AlexNet.config:Train=[
+    action=train
+    modelPath=F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/train_map.txt
+        randomize=Auto
+        numCPUThreads = 1 
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=C:\repo\cntk_github5\cntk\tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+command: Train AddTop5Eval Test 
+precision = float
+CNTKModelPath: F:\cygwin64\tmp\cntk-test-20160128155330.820671\Image_AlexNet@debug_gpu/models/AlexNet
+CNTKCommandTrainInfo: Train : 3
+CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+CNTKCommandTrainBegin: Train
+NDLBuilder Using GPU 0
+Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus
+	Err = ErrorPrediction
+	CE = CrossEntropyWithSoftmax
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+SGD using GPU 0.
+
+Training criterion node(s):
+	CE = CrossEntropyWithSoftmax
+
+Evaluation criterion node(s):
+	Err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+No PreCompute nodes found, skipping PreCompute step
+Set Max Temp Mem Size For Convolution Nodes to 0 samples.
+Starting Epoch 1: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 1 of 3]-Minibatch[   1- 100]: * 1600; ce =  7.42966797; err = 1.00000000; TotalTime = 31.2538s; SamplesPerSecond = 51.2
+Finished Epoch[ 1 of 3]: [Training] ce = 7.2399459; err = 0.99933308; learningRatePerSample = 0.00062499999; EpochTime=56.2334
+Starting Epoch 2: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 2 of 3]-Minibatch[   1- 100, 100.00%]: * 1600; ce =  6.90161499; err = 0.99875000; TotalTime = 28.6861s; SamplesPerSecond = 55.8
+Finished Epoch[ 2 of 3]: [Training] ce = 6.9208598; err = 0.99899966; learningRatePerSample = 0.00062499999; EpochTime=53.6826
+Starting Epoch 3: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+Starting minibatch loop.
+ Epoch[ 3 of 3]-Minibatch[   1- 100, 100.00%]: * 1600; ce =  6.87744995; err = 0.99812500; TotalTime = 28.9379s; SamplesPerSecond = 55.3
+Finished Epoch[ 3 of 3]: [Training] ce = 6.8854518; err = 0.99799931; learningRatePerSample = 0.00062499999; EpochTime=53.9703
+CNTKCommandTrainEnd: Train
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+WARNING: trying to use cuDNN on unsupported platform. It is safe to ignore the warning if it's produced during model editing command.
+
+Post-processing network...
+
+3 roots:
+	CE = CrossEntropyWithSoftmax
+	Err = ErrorPrediction
+	OutputNodes.z = Plus
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+Post-processing network...
+
+4 roots:
+	CE = CrossEntropyWithSoftmax
+	errTop5 = ErrorPrediction
+	Err = ErrorPrediction
+	OutputNodes.z = Plus
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+FormNestedNetwork: WARNING: Was called twice for errTop5 ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node errTop5. 48 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+Post-processing network...
+
+4 roots:
+	Err = ErrorPrediction
+	errTop5 = ErrorPrediction
+	OutputNodes.z = Plus
+	CE = CrossEntropyWithSoftmax
+FormNestedNetwork: WARNING: Was called twice for Err ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for errTop5 ErrorPrediction operation
+FormNestedNetwork: WARNING: Was called twice for OutputNodes.z Plus operation
+FormNestedNetwork: WARNING: Was called twice for CE CrossEntropyWithSoftmax operation
+
+
+Validating for node Err. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node Err, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> Err = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node errTop5. 48 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+Validating for node errTop5, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> unnamed125 = LearnableParameter -> [1 [1 {1}], 1]
+Validating --> errTop5 = ErrorPrediction(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0], unnamed125[1, 1]) -> [1 [1 {1}], 1]
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node OutputNodes.z. 45 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z. 28 nodes to process in pass 2.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+Validating for node OutputNodes.z, final verification.
+
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+
+16 out of 45 nodes do not share the minibatch layout with the input data.
+
+
+Validating for node CE. 47 nodes to process in pass 1.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE. 29 nodes to process in pass 2.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+Validating for node CE, final verification.
+
+Validating --> labels = InputValue -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.W = LearnableParameter -> [1000 [1000 {1}], 4096]
+Validating --> h2.W = LearnableParameter -> [4096 [4096 {1}], 4096]
+Validating --> h1.W = LearnableParameter -> [4096 [4096 {1}], 9216]
+Validating --> conv5.W = LearnableParameter -> [256 [256 {1}], 2304]
+Validating --> conv4.W = LearnableParameter -> [256 [256 {1}], 3456]
+Validating --> conv3.W = LearnableParameter -> [384 [384 {1}], 1728]
+Validating --> conv2.W = LearnableParameter -> [192 [192 {1}], 1600]
+Validating --> conv1.W = LearnableParameter -> [64 [64 {1}], 363]
+Validating --> features = InputValue -> [150528 [224 x 224 x 3 {1,224,50176}], MBSize 0]
+Validating --> conv1.c = Convolution(conv1.W[64, 363], features[150528 [224 x 224 x 3 {1,224,50176}] {W=224, H=3, C=224}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.b = LearnableParameter -> [64 [1 x 1 x 64 {1,1,1}], 1]
+Validating --> conv1.z = Plus(conv1.c[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0], conv1.b[64 [1 x 1 x 64 {1,1,1}], 1]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> conv1.y = RectifiedLinear(conv1.z[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [200704 [56 x 56 x 64 {1,56,3136}], MBSize 0]
+Validating --> pool1 = MaxPooling(conv1.y[200704 [56 x 56 x 64 {1,56,3136}] {W=56, H=64, C=56}, MBSize 0]) -> [46656 [27 x 27 x 64 {1,27,729}], MBSize 0]
+Validating --> conv2.c = Convolution(conv2.W[192, 1600], pool1[46656 [27 x 27 x 64 {1,27,729}] {W=27, H=64, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.b = LearnableParameter -> [192 [1 x 1 x 192 {1,1,1}], 1]
+Validating --> conv2.z = Plus(conv2.c[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0], conv2.b[192 [1 x 1 x 192 {1,1,1}], 1]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> conv2.y = RectifiedLinear(conv2.z[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [139968 [27 x 27 x 192 {1,27,729}], MBSize 0]
+Validating --> pool2 = MaxPooling(conv2.y[139968 [27 x 27 x 192 {1,27,729}] {W=27, H=192, C=27}, MBSize 0]) -> [32448 [13 x 13 x 192 {1,13,169}], MBSize 0]
+Validating --> conv3.c = Convolution(conv3.W[384, 1728], pool2[32448 [13 x 13 x 192 {1,13,169}] {W=13, H=192, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.b = LearnableParameter -> [384 [1 x 1 x 384 {1,1,1}], 1]
+Validating --> conv3.z = Plus(conv3.c[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0], conv3.b[384 [1 x 1 x 384 {1,1,1}], 1]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv3.y = RectifiedLinear(conv3.z[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [64896 [13 x 13 x 384 {1,13,169}], MBSize 0]
+Validating --> conv4.c = Convolution(conv4.W[256, 3456], conv3.y[64896 [13 x 13 x 384 {1,13,169}] {W=13, H=384, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv4.z = Plus(conv4.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv4.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv4.y = RectifiedLinear(conv4.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.c = Convolution(conv5.W[256, 2304], conv4.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.b = LearnableParameter -> [256 [1 x 1 x 256 {1,1,1}], 1]
+Validating --> conv5.z = Plus(conv5.c[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0], conv5.b[256 [1 x 1 x 256 {1,1,1}], 1]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> conv5.y = RectifiedLinear(conv5.z[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [43264 [13 x 13 x 256 {1,13,169}], MBSize 0]
+Validating --> pool3 = MaxPooling(conv5.y[43264 [13 x 13 x 256 {1,13,169}] {W=13, H=256, C=13}, MBSize 0]) -> [9216 [6 x 6 x 256 {1,6,36}], MBSize 0]
+Validating --> h1.t = Times(h1.W[4096, 9216], pool3[9216 [6 x 6 x 256 {1,6,36}] {W=6, H=256, C=6}, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h1.z = Plus(h1.t[4096, MBSize 0], h1.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1.y = RectifiedLinear(h1.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h1_d = Dropout(h1.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.t = Times(h2.W[4096, 4096], h1_d[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.b = LearnableParameter -> [4096 [4096 {1}], 1]
+Validating --> h2.z = Plus(h2.t[4096, MBSize 0], h2.b[4096, 1]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2.y = RectifiedLinear(h2.z[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> h2_d = Dropout(h2.y[4096, MBSize 0]) -> [4096 [4096 {1}], MBSize 0]
+Validating --> OutputNodes.t = Times(OutputNodes.W[1000, 4096], h2_d[4096, MBSize 0]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> OutputNodes.b = LearnableParameter -> [1000 [1000 {1}], 1]
+Validating --> OutputNodes.z = Plus(OutputNodes.t[1000, MBSize 0], OutputNodes.b[1000, 1]) -> [1000 [1000 {1}], MBSize 0]
+Validating --> CE = CrossEntropyWithSoftmax(labels[1000, MBSize 0], OutputNodes.z[1000, MBSize 0]) -> [1 [1 {1}], 1]
+
+17 out of 47 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+CUDA error 11 [c:\tools\cub-1.4.1\cub\device\dispatch/dispatch_radix_sort.cuh, 796]: invalid argument
+Minibatch[1-32]: Samples Seen = 500    Err: ErrorPrediction/Sample = 1    errTop5: ErrorPrediction/Sample = 0.992    CE: CrossEntropyWithSoftmax/Sample = 6.9566009    
+Final Results: Minibatch[1-32]: Samples Seen = 500    Err: ErrorPrediction/Sample = 1    errTop5: ErrorPrediction/Sample = 0.992    CE: CrossEntropyWithSoftmax/Sample = 6.9566009    perplexity = 1050.0582    
+__COMPLETED__
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.release.gpu.txt b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.release.gpu.txt
new file mode 100644
index 000000000..7b7f66d90
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/baseline.windows.release.gpu.txt
@@ -0,0 +1,851 @@
+Copying test data to local directory
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.cntk currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu DeviceId=0 timestamping=true
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May  3 2016 13:23:06
+		Last modified date: Mon Apr 18 00:00:12 2016
+		Build type: Release
+		Build target: GPU
+		With 1bit-SGD: no
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+		CUB_PATH: C:\src\cub-1.4.1
+		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+		Built by svcphil on LIANA-09-w
+		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+-------------------------------------------------------------------
+Changed current directory to C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+05/03/2016 14:11:01: -------------------------------------------------------------------
+05/03/2016 14:11:01: Build info: 
+
+05/03/2016 14:11:01: 		Built time: May  3 2016 13:23:06
+05/03/2016 14:11:01: 		Last modified date: Mon Apr 18 00:00:12 2016
+05/03/2016 14:11:01: 		Build type: Release
+05/03/2016 14:11:01: 		Build target: GPU
+05/03/2016 14:11:01: 		With 1bit-SGD: no
+05/03/2016 14:11:01: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/03/2016 14:11:01: 		CUB_PATH: C:\src\cub-1.4.1
+05/03/2016 14:11:01: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/03/2016 14:11:01: 		Build Branch: HEAD
+05/03/2016 14:11:01: 		Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
+05/03/2016 14:11:01: 		Built by svcphil on LIANA-09-w
+05/03/2016 14:11:01: 		Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/03/2016 14:11:01: -------------------------------------------------------------------
+
+05/03/2016 14:11:01: Running on DPHAIM-25 at 2016/05/03 14:11:01
+05/03/2016 14:11:01: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.cntk  currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu  DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu  DeviceId=0  timestamping=true
+
+
+
+05/03/2016 14:11:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:11:01: ModelDir = "$RunDir$/models"
+ndlMacros=$ConfigDir$/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=$ModelDir$/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=$ModelDir$/AlexNet
+    NewModel=$ModelDir$/AlexNet.Top5
+    editPath=$ConfigDir$/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=$ModelDir$/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=$ConfigDir$/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=$ConfigDir$/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=$ConfigDir$/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+DeviceId=0
+timestamping=true
+
+05/03/2016 14:11:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/03/2016 14:11:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/03/2016 14:11:01: ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models"
+ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/Macros.ndl
+precision=float
+deviceId=Auto
+command=Train:AddTop5Eval:Test
+parallelTrain=false
+traceLevel=1
+numMBsToShowResult=100
+Train=[
+    action=train
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+AddTop5Eval=[    
+    action=edit
+    CurModel=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet
+    NewModel=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.Top5
+    editPath=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/add_top5_layer.mel
+]
+Test=[
+    action=test
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+DeviceId=0
+timestamping=true
+
+05/03/2016 14:11:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/03/2016 14:11:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: AlexNet.cntk:AddTop5Eval=[    
+    action=edit
+    CurModel=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet
+    NewModel=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.Top5
+    editPath=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/add_top5_layer.mel
+]
+
+configparameters: AlexNet.cntk:command=Train:AddTop5Eval:Test
+configparameters: AlexNet.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet
+configparameters: AlexNet.cntk:currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+configparameters: AlexNet.cntk:DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu\TestData
+configparameters: AlexNet.cntk:deviceId=0
+configparameters: AlexNet.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models
+configparameters: AlexNet.cntk:ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/Macros.ndl
+configparameters: AlexNet.cntk:numMBsToShowResult=100
+configparameters: AlexNet.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+configparameters: AlexNet.cntk:parallelTrain=false
+configparameters: AlexNet.cntk:precision=float
+configparameters: AlexNet.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu
+configparameters: AlexNet.cntk:Test=[
+    action=test
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.Top5
+    minibatchSize=16
+     NDLNetworkBuilder=[
+        networkDescription=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/val_map.txt
+        randomize=None
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Center
+            meanFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+configparameters: AlexNet.cntk:timestamping=true
+configparameters: AlexNet.cntk:traceLevel=1
+configparameters: AlexNet.cntk:Train=[
+    action=train
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet
+    NDLNetworkBuilder=[
+        networkDescription=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/AlexNet.ndl
+    ]
+    SGD=[
+        epochSize=0
+        minibatchSize=16
+        learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
+        momentumPerMB=0.9
+        maxEpochs=3
+        gradUpdateType=None
+        L2RegWeight=0.0005
+        dropoutRate=0*5:0.5
+        ParallelTrain=[
+            parallelizationMethod=DataParallelSGD
+            distributedMBReading=true
+            parallelizationStartEpoch=1
+            DataParallelSGD=[
+                gradientBits=1
+            ]
+        ]
+        numMBsToShowResult=100
+    ]
+    reader=[
+        readerType=ImageReader
+        file=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/train_map.txt
+        randomize=Auto
+        features=[
+            width=224
+            height=224
+            channels=3
+            cropType=Random
+            cropRatio=0.875
+            jitterType=UniRatio
+            interpolations=Linear
+            meanFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Image\AlexNet/ImageNet1K_mean.xml
+        ]
+        labels=[
+            labelDim=1000
+        ]
+    ]    
+]
+
+05/03/2016 14:11:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/03/2016 14:11:01: Commands: Train AddTop5Eval Test
+05/03/2016 14:11:01: Precision = "float"
+05/03/2016 14:11:01: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet
+05/03/2016 14:11:01: CNTKCommandTrainInfo: Train : 3
+05/03/2016 14:11:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/03/2016 14:11:01: ##############################################################################
+05/03/2016 14:11:01: #                                                                            #
+05/03/2016 14:11:01: # Action "train"                                                             #
+05/03/2016 14:11:01: #                                                                            #
+05/03/2016 14:11:01: ##############################################################################
+
+05/03/2016 14:11:01: CNTKCommandTrainBegin: Train
+NDLBuilder Using GPU 0
+
+05/03/2016 14:11:01: Creating virgin network.
+Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+
+Validating network. 48 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *] -> [56 x 56 x 64 x *]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *], [1 x 1 x 64] -> [56 x 56 x 64 x *]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *] -> [56 x 56 x 64 x *]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *] -> [27 x 27 x 64 x *]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *] -> [27 x 27 x 192 x *]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *], [1 x 1 x 192] -> [27 x 27 x 192 x *]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *] -> [27 x 27 x 192 x *]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *] -> [13 x 13 x 192 x *]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *] -> [13 x 13 x 384 x *]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *], [1 x 1 x 384] -> [13 x 13 x 384 x *]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *] -> [13 x 13 x 384 x *]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *] -> [13 x 13 x 256 x *]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *], [1 x 1 x 256] -> [13 x 13 x 256 x *]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *], [1 x 1 x 256] -> [13 x 13 x 256 x *]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *] -> [13 x 13 x 256 x *]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *] -> [6 x 6 x 256 x *]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *] -> [4096 x *]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *], [4096] -> [4096 x *]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *] -> [4096 x *]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *] -> [4096 x *]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *] -> [4096 x *]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *], [4096] -> [4096 x *]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *] -> [4096 x *]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *] -> [4096 x *]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *] -> [1000 x *]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *], [1000] -> [1000 x *]
+Validating --> labels = InputValue() :  -> [1000 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *], [1000 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *], [1000 x *] -> [1]
+
+Validating network. 30 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/03/2016 14:11:02: Created model with 48 nodes on GPU 0.
+
+05/03/2016 14:11:02: Training criterion node(s):
+05/03/2016 14:11:02: 	ce = CrossEntropyWithSoftmax
+
+05/03/2016 14:11:02: Evaluation criterion node(s):
+
+05/03/2016 14:11:02: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[err Gradient[1]] [features Gradient[224 x 224 x 3 x *]] [labels Gradient[1000 x *]] }
+000000E290039200: {[conv2.W Value[192 x 1600]] }
+000000E290039340: {[conv1.W Value[64 x 363]] }
+000000E290039480: {[conv1.b Value[1 x 1 x 64]] }
+000000E290039520: {[conv2.b Value[1 x 1 x 192]] }
+000000E29003A060: {[features Value[224 x 224 x 3 x *]] }
+000000E29003A240: {[labels Value[1000 x *]] }
+000000E2A80AE1D0: {[OutputNodes.b Value[1000]] }
+000000E2A80AE270: {[conv3.W Value[384 x 1728]] }
+000000E2A80AE310: {[h1.W Value[4096 x 6 x 6 x 256]] }
+000000E2A80AE950: {[conv5.b Value[1 x 1 x 256]] }
+000000E2A80AEC70: {[h1.b Value[4096]] }
+000000E2A80AF350: {[h2.W Value[4096 x 4096]] }
+000000E2A80AF530: {[conv3.b Value[1 x 1 x 384]] }
+000000E2A80AF710: {[conv4.b Value[1 x 1 x 256]] }
+000000E2A80AFA30: {[h2.b Value[4096]] }
+000000E2A80AFDF0: {[conv5.W Value[256 x 2304]] }
+000000E2A80AFE90: {[conv4.W Value[256 x 3456]] }
+000000E2A80AFF30: {[OutputNodes.W Value[1000 x 4096]] }
+000000E2AE0BA220: {[conv4.c Value[13 x 13 x 256 x *]] }
+000000E2AE0BA2C0: {[h2.W Gradient[4096 x 4096]] [h2.z Value[4096 x *]] }
+000000E2AE0BA360: {[conv5.c Gradient[13 x 13 x 256 x *]] [conv5.y Value[13 x 13 x 256 x *]] }
+000000E2AE0BA400: {[OutputNodes.t Value[1000 x *]] [h2.b Gradient[4096]] [h2.y Gradient[4096 x *]] }
+000000E2AE0BA720: {[err Value[1]] }
+000000E2AE0BA7C0: {[conv3.b Gradient[1 x 1 x 384]] [conv3.y Gradient[13 x 13 x 384 x *]] [conv4.z Gradient[13 x 13 x 256 x *]] }
+000000E2AE0BA860: {[conv1.c Gradient[56 x 56 x 64 x *]] [conv1.y Value[56 x 56 x 64 x *]] }
+000000E2AE0BA900: {[conv1.b Gradient[1 x 1 x 64]] [conv1.y Gradient[56 x 56 x 64 x *]] }
+000000E2AE0BA9A0: {[conv1.z Gradient[56 x 56 x 64 x *]] [pool1 Value[27 x 27 x 64 x *]] }
+000000E2AE0BAA40: {[conv3.z Gradient[13 x 13 x 384 x *]] [pool2 Gradient[13 x 13 x 192 x *]] }
+000000E2AE0BAAE0: {[conv5.W Gradient[256 x 2304]] [conv5.z Value[13 x 13 x 256 x *]] }
+000000E2AE0BAB80: {[h1_d Value[4096 x *]] }
+000000E2AE0BACC0: {[conv3.c Gradient[13 x 13 x 384 x *]] [conv3.y Value[13 x 13 x 384 x *]] }
+000000E2AE0BAE00: {[conv3.c Value[13 x 13 x 384 x *]] }
+000000E2AE0BAEA0: {[conv4.W Gradient[256 x 3456]] [conv4.z Value[13 x 13 x 256 x *]] }
+000000E2AE0BAFE0: {[h2_d Value[4096 x *]] }
+000000E2AE0BB080: {[conv4.c Gradient[13 x 13 x 256 x *]] [conv4.y Value[13 x 13 x 256 x *]] }
+000000E2AE0BB120: {[h1.W Gradient[4096 x 6 x 6 x 256]] [h1.z Value[4096 x *]] }
+000000E2AE0BB1C0: {[ce Gradient[1]] }
+000000E2AE0BB260: {[OutputNodes.b Gradient[1000]] }
+000000E2AE0BB3A0: {[conv2.W Gradient[192 x 1600]] [conv2.z Value[27 x 27 x 192 x *]] }
+000000E2AE0BB4E0: {[conv1.W Gradient[64 x 363]] [conv1.z Value[56 x 56 x 64 x *]] }
+000000E2AE0BB800: {[conv2.b Gradient[1 x 1 x 192]] [conv2.y Gradient[27 x 27 x 192 x *]] }
+000000E2AE0BB940: {[h1.z Gradient[4096 x *]] [pool3 Gradient[6 x 6 x 256 x *]] }
+000000E2AE0BB9E0: {[h1.b Gradient[4096]] [h1.y Gradient[4096 x *]] [h2.t Value[4096 x *]] }
+000000E2AE0BBB20: {[OutputNodes.t Gradient[1000 x *]] }
+000000E2AE0BBBC0: {[conv4.b Gradient[1 x 1 x 256]] [conv4.y Gradient[13 x 13 x 256 x *]] [conv5.z Gradient[13 x 13 x 256 x *]] [pool3 Value[6 x 6 x 256 x *]] }
+000000E2AE0BBD00: {[ce Value[1]] }
+000000E2AE0BBDA0: {[conv2.c Value[27 x 27 x 192 x *]] }
+000000E2AE0BBE40: {[conv1.c Value[56 x 56 x 64 x *]] }
+000000E2AE0BBF80: {[conv2.c Gradient[27 x 27 x 192 x *]] [conv2.y Value[27 x 27 x 192 x *]] }
+000000E2AE0BC020: {[h2.t Gradient[4096 x *]] [h2.y Value[4096 x *]] }
+000000E2AE0BC160: {[conv5.c Value[13 x 13 x 256 x *]] }
+000000E2AE0BC200: {[conv2.z Gradient[27 x 27 x 192 x *]] [pool1 Gradient[27 x 27 x 64 x *]] [pool2 Value[13 x 13 x 192 x *]] }
+000000E2AE0BC2A0: {[OutputNodes.z Value[1000 x *]] }
+000000E2AE0BC340: {[h1_d Gradient[4096 x *]] [h2.z Gradient[4096 x *]] }
+000000E2AE0BC480: {[OutputNodes.W Gradient[1000 x 4096]] [OutputNodes.z Gradient[1000 x *]] }
+000000E2AE0BC520: {[h2_d Gradient[4096 x *]] }
+000000E2AE0BC840: {[conv3.W Gradient[384 x 1728]] [conv3.z Value[13 x 13 x 384 x *]] }
+000000E2AE0BC8E0: {[conv5.b Gradient[1 x 1 x 256]] [conv5.y Gradient[13 x 13 x 256 x *]] [h1.t Value[4096 x *]] }
+000000E2AE0BC980: {[h1.t Gradient[4096 x *]] [h1.y Value[4096 x *]] }
+
+05/03/2016 14:11:02: No PreCompute nodes found, skipping PreCompute step.
+
+05/03/2016 14:11:05: Starting Epoch 1: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 14:11:05: Starting minibatch loop.
+05/03/2016 14:11:14:  Epoch[ 1 of 3]-Minibatch[   1- 100]: ce = 7.43287354 * 1600; err = 0.99937500 * 1600; time = 8.8275s; samplesPerSecond = 181.3
+05/03/2016 14:11:20: Finished Epoch[ 1 of 3]: [Training] ce = 7.24222462 * 2999; err = 0.99933311 * 2999; totalSamplesSeen = 2999; learningRatePerSample = 0.00062499999; epochTime=14.8733s
+05/03/2016 14:11:24: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.1'
+
+05/03/2016 14:11:27: Starting Epoch 2: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 14:11:27: Starting minibatch loop.
+05/03/2016 14:11:34:  Epoch[ 2 of 3]-Minibatch[   1- 100, 100.00%]: ce = 6.90465576 * 1600; err = 0.99937500 * 1600; time = 6.9523s; samplesPerSecond = 230.1
+05/03/2016 14:11:40: Finished Epoch[ 2 of 3]: [Training] ce = 6.91868774 * 2999; err = 0.99899967 * 2999; totalSamplesSeen = 5998; learningRatePerSample = 0.00062499999; epochTime=12.9929s
+05/03/2016 14:11:43: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet.2'
+
+05/03/2016 14:11:46: Starting Epoch 3: learning rate per sample = 0.000625  effective momentum = 0.900000  momentum as time constant = 151.9 samples
+
+05/03/2016 14:11:46: Starting minibatch loop.
+05/03/2016 14:11:53:  Epoch[ 3 of 3]-Minibatch[   1- 100, 100.00%]: ce = 6.87353699 * 1600; err = 0.99750000 * 1600; time = 7.0845s; samplesPerSecond = 225.8
+05/03/2016 14:11:59: Finished Epoch[ 3 of 3]: [Training] ce = 6.88654161 * 2999; err = 0.99799933 * 2999; totalSamplesSeen = 8997; learningRatePerSample = 0.00062499999; epochTime=13.0423s
+05/03/2016 14:12:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503141032.133212\Image_AlexNet@release_gpu/models/AlexNet'
+05/03/2016 14:12:06: CNTKCommandTrainEnd: Train
+
+05/03/2016 14:12:06: Action "train" complete.
+
+
+05/03/2016 14:12:06: ##############################################################################
+05/03/2016 14:12:06: #                                                                            #
+05/03/2016 14:12:06: # Action "edit"                                                              #
+05/03/2016 14:12:06: #                                                                            #
+05/03/2016 14:12:06: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+
+Validating network. 48 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *1]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *1] -> [56 x 56 x 64 x *1]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *1], [1 x 1 x 64] -> [56 x 56 x 64 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *1] -> [56 x 56 x 64 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *1] -> [27 x 27 x 64 x *1]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *1] -> [27 x 27 x 192 x *1]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *1], [1 x 1 x 192] -> [27 x 27 x 192 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *1] -> [27 x 27 x 192 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *1] -> [13 x 13 x 192 x *1]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *1], [1 x 1 x 384] -> [13 x 13 x 384 x *1]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *1] -> [6 x 6 x 256 x *1]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *1] -> [4096 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *1] -> [4096 x *1]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *1] -> [4096 x *1]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *1] -> [4096 x *1]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *1] -> [1000 x *1]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *1], [1000] -> [1000 x *1]
+Validating --> labels = InputValue() :  -> [1000 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+
+Validating network. 30 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using GEMM convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using GEMM convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+18 out of 48 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+
+Post-processing network...
+
+4 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop5 = ErrorPrediction()
+
+Validating network. 50 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *1]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *1] -> [56 x 56 x 64 x *1]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *1], [1 x 1 x 64] -> [56 x 56 x 64 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *1] -> [56 x 56 x 64 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *1] -> [27 x 27 x 64 x *1]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *1] -> [27 x 27 x 192 x *1]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *1], [1 x 1 x 192] -> [27 x 27 x 192 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *1] -> [27 x 27 x 192 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *1] -> [13 x 13 x 192 x *1]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *1], [1 x 1 x 384] -> [13 x 13 x 384 x *1]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *1] -> [13 x 13 x 384 x *1]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *1], [1 x 1 x 256] -> [13 x 13 x 256 x *1]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *1] -> [13 x 13 x 256 x *1]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *1] -> [6 x 6 x 256 x *1]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *1] -> [4096 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *1] -> [4096 x *1]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *1] -> [4096 x *1]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *1], [4096] -> [4096 x *1]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *1] -> [4096 x *1]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *1] -> [4096 x *1]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *1] -> [1000 x *1]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *1], [1000] -> [1000 x *1]
+Validating --> labels = InputValue() :  -> [1000 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *1], [1000 x *1] -> [1]
+Validating --> unnamed137 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop5 = ErrorPrediction (labels, OutputNodes.z, unnamed137) : [1000 x *1], [1000 x *1], [1 x 1] -> [1]
+
+Validating network. 31 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+20 out of 50 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+
+05/03/2016 14:12:12: Action "edit" complete.
+
+
+05/03/2016 14:12:12: ##############################################################################
+05/03/2016 14:12:12: #                                                                            #
+05/03/2016 14:12:12: # Action "test"                                                              #
+05/03/2016 14:12:12: #                                                                            #
+05/03/2016 14:12:12: ##############################################################################
+
+
+Post-processing network...
+
+4 roots:
+	OutputNodes.z = Plus()
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop5 = ErrorPrediction()
+
+Validating network. 50 nodes to process in pass 1.
+
+Validating --> OutputNodes.W = LearnableParameter() :  -> [1000 x 4096]
+Validating --> h2.W = LearnableParameter() :  -> [4096 x 4096]
+Validating --> h1.W = LearnableParameter() :  -> [4096 x 6 x 6 x 256]
+Validating --> conv5.W = LearnableParameter() :  -> [256 x 2304]
+Validating --> conv4.W = LearnableParameter() :  -> [256 x 3456]
+Validating --> conv3.W = LearnableParameter() :  -> [384 x 1728]
+Validating --> conv2.W = LearnableParameter() :  -> [192 x 1600]
+Validating --> conv1.W = LearnableParameter() :  -> [64 x 363]
+Validating --> features = InputValue() :  -> [224 x 224 x 3 x *2]
+Validating --> conv1.c = Convolution (conv1.W, features) : [64 x 363], [224 x 224 x 3 x *2] -> [56 x 56 x 64 x *2]
+Validating --> conv1.b = LearnableParameter() :  -> [1 x 1 x 64]
+Validating --> conv1.z = Plus (conv1.c, conv1.b) : [56 x 56 x 64 x *2], [1 x 1 x 64] -> [56 x 56 x 64 x *2]
+Validating --> conv1.y = RectifiedLinear (conv1.z) : [56 x 56 x 64 x *2] -> [56 x 56 x 64 x *2]
+Validating --> pool1 = MaxPooling (conv1.y) : [56 x 56 x 64 x *2] -> [27 x 27 x 64 x *2]
+Validating --> conv2.c = Convolution (conv2.W, pool1) : [192 x 1600], [27 x 27 x 64 x *2] -> [27 x 27 x 192 x *2]
+Validating --> conv2.b = LearnableParameter() :  -> [1 x 1 x 192]
+Validating --> conv2.z = Plus (conv2.c, conv2.b) : [27 x 27 x 192 x *2], [1 x 1 x 192] -> [27 x 27 x 192 x *2]
+Validating --> conv2.y = RectifiedLinear (conv2.z) : [27 x 27 x 192 x *2] -> [27 x 27 x 192 x *2]
+Validating --> pool2 = MaxPooling (conv2.y) : [27 x 27 x 192 x *2] -> [13 x 13 x 192 x *2]
+Validating --> conv3.c = Convolution (conv3.W, pool2) : [384 x 1728], [13 x 13 x 192 x *2] -> [13 x 13 x 384 x *2]
+Validating --> conv3.b = LearnableParameter() :  -> [1 x 1 x 384]
+Validating --> conv3.z = Plus (conv3.c, conv3.b) : [13 x 13 x 384 x *2], [1 x 1 x 384] -> [13 x 13 x 384 x *2]
+Validating --> conv3.y = RectifiedLinear (conv3.z) : [13 x 13 x 384 x *2] -> [13 x 13 x 384 x *2]
+Validating --> conv4.c = Convolution (conv4.W, conv3.y) : [256 x 3456], [13 x 13 x 384 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv4.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv4.z = Plus (conv4.c, conv4.b) : [13 x 13 x 256 x *2], [1 x 1 x 256] -> [13 x 13 x 256 x *2]
+Validating --> conv4.y = RectifiedLinear (conv4.z) : [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv5.c = Convolution (conv5.W, conv4.y) : [256 x 2304], [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> conv5.b = LearnableParameter() :  -> [1 x 1 x 256]
+Validating --> conv5.z = Plus (conv5.c, conv5.b) : [13 x 13 x 256 x *2], [1 x 1 x 256] -> [13 x 13 x 256 x *2]
+Validating --> conv5.y = RectifiedLinear (conv5.z) : [13 x 13 x 256 x *2] -> [13 x 13 x 256 x *2]
+Validating --> pool3 = MaxPooling (conv5.y) : [13 x 13 x 256 x *2] -> [6 x 6 x 256 x *2]
+Validating --> h1.t = Times (h1.W, pool3) : [4096 x 6 x 6 x 256], [6 x 6 x 256 x *2] -> [4096 x *2]
+Validating --> h1.b = LearnableParameter() :  -> [4096]
+Validating --> h1.z = Plus (h1.t, h1.b) : [4096 x *2], [4096] -> [4096 x *2]
+Validating --> h1.y = RectifiedLinear (h1.z) : [4096 x *2] -> [4096 x *2]
+Validating --> h1_d = Dropout (h1.y) : [4096 x *2] -> [4096 x *2]
+Validating --> h2.t = Times (h2.W, h1_d) : [4096 x 4096], [4096 x *2] -> [4096 x *2]
+Validating --> h2.b = LearnableParameter() :  -> [4096]
+Validating --> h2.z = Plus (h2.t, h2.b) : [4096 x *2], [4096] -> [4096 x *2]
+Validating --> h2.y = RectifiedLinear (h2.z) : [4096 x *2] -> [4096 x *2]
+Validating --> h2_d = Dropout (h2.y) : [4096 x *2] -> [4096 x *2]
+Validating --> OutputNodes.t = Times (OutputNodes.W, h2_d) : [1000 x 4096], [4096 x *2] -> [1000 x *2]
+Validating --> OutputNodes.b = LearnableParameter() :  -> [1000]
+Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [1000 x *2], [1000] -> [1000 x *2]
+Validating --> labels = InputValue() :  -> [1000 x *2]
+Validating --> ce = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [1000 x *2], [1000 x *2] -> [1]
+Validating --> err = ErrorPrediction (labels, OutputNodes.z) : [1000 x *2], [1000 x *2] -> [1]
+Validating --> unnamed137 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop5 = ErrorPrediction (labels, OutputNodes.z, unnamed137) : [1000 x *2], [1000 x *2], [1 x 1] -> [1]
+
+Validating network. 31 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 224 x 224 x 3, Output: 56 x 56 x 64, Kernel: 11 x 11 x 3, Map: 1 x 1 x 64, Stride: 4 x 4 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 56 x 56 x 64, Output: 27 x 27 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 64, Output: 27 x 27 x 192, Kernel: 5 x 5 x 64, Map: 1 x 1 x 192, Stride: 1 x 1 x 64, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 27 x 27 x 192, Output: 13 x 13 x 192, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 192, Output: 13 x 13 x 384, Kernel: 3 x 3 x 192, Map: 1 x 1 x 384, Stride: 1 x 1 x 192, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 384, Output: 13 x 13 x 256, Kernel: 3 x 3 x 384, Map: 1 x 1 x 256, Stride: 1 x 1 x 384, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 13 x 13 x 256, Kernel: 3 x 3 x 256, Map: 1 x 1 x 256, Stride: 1 x 1 x 256, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 13 x 13 x 256, Output: 6 x 6 x 256, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+
+20 out of 50 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[OutputNodes.W Gradient[1000 x 4096]] [OutputNodes.b Gradient[1000]] [OutputNodes.t Gradient[1000 x *2]] [OutputNodes.z Gradient[1000 x *2]] [ce Gradient[1]] [conv1.W Gradient[64 x 363]] [conv1.b Gradient[1 x 1 x 64]] [conv1.c Gradient[56 x 56 x 64 x *2]] [conv1.y Gradient[56 x 56 x 64 x *2]] [conv1.z Gradient[56 x 56 x 64 x *2]] [conv2.W Gradient[192 x 1600]] [conv2.b Gradient[1 x 1 x 192]] [conv2.c Gradient[27 x 27 x 192 x *2]] [conv2.y Gradient[27 x 27 x 192 x *2]] [conv2.z Gradient[27 x 27 x 192 x *2]] [conv3.W Gradient[384 x 1728]] [conv3.b Gradient[1 x 1 x 384]] [conv3.c Gradient[13 x 13 x 384 x *2]] [conv3.y Gradient[13 x 13 x 384 x *2]] [conv3.z Gradient[13 x 13 x 384 x *2]] [conv4.W Gradient[256 x 3456]] [conv4.b Gradient[1 x 1 x 256]] [conv4.c Gradient[13 x 13 x 256 x *2]] [conv4.y Gradient[13 x 13 x 256 x *2]] [conv4.z Gradient[13 x 13 x 256 x *2]] [conv5.W Gradient[256 x 2304]] [conv5.b Gradient[1 x 1 x 256]] [conv5.c Gradient[13 x 13 x 256 x *2]] [conv5.y Gradient[13 x 13 x 256 x *2]] [conv5.z Gradient[13 x 13 x 256 x *2]] [err Gradient[1]] [errTop5 Gradient[1]] [features Gradient[224 x 224 x 3 x *2]] [h1.W Gradient[4096 x 6 x 6 x 256]] [h1.b Gradient[4096]] [h1.t Gradient[4096 x *2]] [h1.y Gradient[4096 x *2]] [h1.z Gradient[4096 x *2]] [h1_d Gradient[4096 x *2]] [h2.W Gradient[4096 x 4096]] [h2.b Gradient[4096]] [h2.t Gradient[4096 x *2]] [h2.y Gradient[4096 x *2]] [h2.z Gradient[4096 x *2]] [h2_d Gradient[4096 x *2]] [labels Gradient[1000 x *2]] [pool1 Gradient[27 x 27 x 64 x *2]] [pool2 Gradient[13 x 13 x 192 x *2]] [pool3 Gradient[6 x 6 x 256 x *2]] [unnamed137 Gradient[1 x 1]] }
+000000E28E168F70: {[conv3.W Value[384 x 1728]] }
+000000E28E1691F0: {[conv5.W Value[256 x 2304]] }
+000000E28E1693D0: {[conv4.b Value[1 x 1 x 256]] }
+000000E28E169510: {[conv4.W Value[256 x 3456]] }
+000000E28E169830: {[conv5.b Value[1 x 1 x 256]] }
+000000E28E1698D0: {[conv3.b Value[1 x 1 x 384]] }
+000000E36C778260: {[OutputNodes.b Value[1000]] }
+000000E36C7783A0: {[OutputNodes.W Value[1000 x 4096]] }
+000000E36C778440: {[labels Value[1000 x *2]] }
+000000E36C7786C0: {[features Value[224 x 224 x 3 x *2]] }
+000000E36C7788A0: {[h1.b Value[4096]] }
+000000E36C7789E0: {[h2.b Value[4096]] }
+000000E36C778B20: {[h2.W Value[4096 x 4096]] }
+000000E36C778DA0: {[h1.W Value[4096 x 6 x 6 x 256]] }
+000000E370969220: {[conv5.y Value[13 x 13 x 256 x *2]] }
+000000E370969360: {[h1.t Value[4096 x *2]] }
+000000E3709694A0: {[conv4.z Value[13 x 13 x 256 x *2]] }
+000000E370969540: {[conv4.c Value[13 x 13 x 256 x *2]] }
+000000E370969680: {[conv4.y Value[13 x 13 x 256 x *2]] }
+000000E370969720: {[conv5.z Value[13 x 13 x 256 x *2]] }
+000000E3709697C0: {[h1.z Value[4096 x *2]] }
+000000E370969860: {[h1_d Value[4096 x *2]] }
+000000E3709699A0: {[h2.t Value[4096 x *2]] }
+000000E370969A40: {[h2.z Value[4096 x *2]] }
+000000E370969AE0: {[h2.y Value[4096 x *2]] }
+000000E370969B80: {[h2_d Value[4096 x *2]] }
+000000E370969C20: {[conv3.y Value[13 x 13 x 384 x *2]] }
+000000E370969CC0: {[conv5.c Value[13 x 13 x 256 x *2]] }
+000000E370969D60: {[h1.y Value[4096 x *2]] }
+000000E370969EA0: {[OutputNodes.t Value[1000 x *2]] }
+000000E370969F40: {[pool3 Value[6 x 6 x 256 x *2]] }
+000000E37096A080: {[OutputNodes.z Value[1000 x *2]] }
+000000E3728E02A0: {[conv2.y Value[27 x 27 x 192 x *2]] }
+000000E3728E0340: {[conv1.c Value[56 x 56 x 64 x *2]] }
+000000E3728E03E0: {[err Value[1]] }
+000000E3728E0480: {[conv1.z Value[56 x 56 x 64 x *2]] }
+000000E3728E0700: {[pool2 Value[13 x 13 x 192 x *2]] }
+000000E3728E07A0: {[conv3.c Value[13 x 13 x 384 x *2]] }
+000000E3728E0980: {[errTop5 Value[1]] }
+000000E3728E0A20: {[conv3.z Value[13 x 13 x 384 x *2]] }
+000000E3728E0AC0: {[ce Value[1]] }
+000000E3728E0CA0: {[unnamed137 Value[1 x 1]] }
+000000E3728E0DE0: {[conv1.y Value[56 x 56 x 64 x *2]] }
+000000E3728E0E80: {[pool1 Value[27 x 27 x 64 x *2]] }
+000000E3728E0F20: {[conv2.c Value[27 x 27 x 192 x *2]] }
+000000E3728E1100: {[conv2.z Value[27 x 27 x 192 x *2]] }
+000000E372D9CB80: {[conv2.b Value[1 x 1 x 192]] }
+000000E372D9CE00: {[conv1.W Value[64 x 363]] }
+000000E372D9CFE0: {[conv2.W Value[192 x 1600]] }
+000000E372D9D120: {[conv1.b Value[1 x 1 x 64]] }
+
+05/03/2016 14:12:19: Final Results: Minibatch[1-32]: err = 0.99800000 * 500; errTop5 = 0.99600000 * 500; ce = 6.94932878 * 500; perplexity = 1042.44978531
+
+05/03/2016 14:12:19: Action "test" complete.
+
+05/03/2016 14:12:19: __COMPLETED__
\ No newline at end of file
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/run-test b/Tests/EndToEndTests/Image/AlexNet/Composite/run-test
new file mode 100644
index 000000000..d5105fc77
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/run-test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $TEST_ROOT_DIR/run-test-common
+
+# This test uses a large dataset which is not part of the CNTK repository itself
+# We use the dataset from an external location specified using an environment variable
+if [[ "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" == "" || ! -d "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" ]]; then
+  echo 'This test uses external data that is not part of the CNTK repository. Environment variable CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY must be set to point to the external test data location'
+  exit 1
+fi
+
+if [ "$OS" == "Windows_NT" ]; then
+    DataSourceDir=`cygpath -au $CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY`/private/Image/ResNet/Data/v0
+else
+    DataSourceDir=$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY/private/Image/ResNet/Data/v0
+fi
+
+# Copy the test data to the test run directory
+echo 'Copying test data to local directory'
+DataDir=$TEST_RUN_DIR/TestData
+mkdir $DataDir
+cp -R $DataSourceDir/* $DataDir || exit $?
+
+OriginalTestDir=..
+
+(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
+if [ $? != 0 ]; then
+  echo Error: Baselines must match original test.
+  exit 1
+fi 
+
+ConfigDir=$TEST_DIR/..
+
+# Set the required parameters for the configuration
+if [ "$OS" == "Windows_NT" ]; then
+    ConfigDir=$(cygpath -aw $ConfigDir)
+fi 
+
+# cntkrun <CNTK config file name> <additional CNTK args>
+cntkrun AlexNetCommon.cntk "configFile=$ConfigDir/AlexNetComposite.cntk"
+ExitCode=$?
+
+# Delete the test data
+rm -rf $DataDir
+
+exit $ExitCode
+
diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml b/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml
new file mode 100644
index 000000000..0978cdb45
--- /dev/null
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml
@@ -0,0 +1,31 @@
+dataDir: ../Data
+tags:
+     # running on every BVT job in 'I' (Image) leg:
+     - bvt-i  (build_sku == 'gpu') and (device=='gpu') and (flavor=='release')
+     # running every Nightly job in 'I' leg
+     - nightly-i (build_sku == 'gpu') and (device=='gpu') and (flavor=='release')
+
+testCases:
+  CNTK Run must be completed:
+    patterns:
+      - __COMPLETED__
+
+  Must train epochs in exactly same order and parameters:
+    patterns:
+      - Starting Epoch {{integer}}
+      - learning rate per sample = {{float}}
+      - momentum = {{float}}
+
+  Epochs must be finished with expected results:
+    patterns:
+      - Finished Epoch[{{integer}} of {{integer}}]
+      - ce = {{float,tolerance=2.0%}}
+      - err = {{float,tolerance=2.0%}}
+      - learningRatePerSample = {{float,tolerance=0.001%}}
+
+  Per-minibatch training results must match:
+    patterns:
+      - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
+      - " * {{integer}}; "
+      - ce = {{float,tolerance=2.0%}}
+
diff --git a/Tests/EndToEndTests/Image/AlexNet/run-test b/Tests/EndToEndTests/Image/AlexNet/run-test
index 232541773..1a73c8697 100755
--- a/Tests/EndToEndTests/Image/AlexNet/run-test
+++ b/Tests/EndToEndTests/Image/AlexNet/run-test
@@ -21,8 +21,12 @@ DataDir=$TEST_RUN_DIR/TestData
 mkdir $DataDir
 cp -R $DataSourceDir/* $DataDir || exit $?
 
+if [ "$OS" == "Windows_NT" ]; then
+    ConfigDir=$(cygpath -aw $ConfigDir)
+fi  
+
 # cntkrun <CNTK config file name> <additional CNTK args>
-cntkrun AlexNet.cntk 
+cntkrun AlexNetCommon.cntk "configFile=$ConfigDir/AlexNet.cntk"
 ExitCode=$?
 
 # Delete the test data

From f26c2be3d0ee16c776f3d9796e84abe52b155c8c Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Fri, 13 May 2016 12:27:34 +0200
Subject: [PATCH 45/51] Fixing thread number

---
 Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
index 7c896320d..32c6e3664 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNet.cntk
@@ -1,7 +1,6 @@
 Train=[
     reader=[
         readerType=ImageReader
-        numCPUThreads=1
         # Map file which maps images to labels using the following format:
         # <full path to image><tab><numerical label (0-based class id)>
         # Example:

From 2fce793fc2729c9f33b24a24030cc324af7812c6 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Fri, 13 May 2016 12:37:29 +0200
Subject: [PATCH 46/51] Changing data directory

---
 Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml b/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml
index 0978cdb45..62ac11f2a 100644
--- a/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/testcases.yml
@@ -1,4 +1,4 @@
-dataDir: ../Data
+dataDir: ../../Data
 tags:
      # running on every BVT job in 'I' (Image) leg:
      - bvt-i  (build_sku == 'gpu') and (device=='gpu') and (flavor=='release')

From 72de9b5a08dbccd67d53f8a3639c496b57068755 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Fri, 13 May 2016 12:49:39 +0200
Subject: [PATCH 47/51] Making error message more verbose

---
 Tests/EndToEndTests/Image/AlexNet/Composite/run-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/EndToEndTests/Image/AlexNet/Composite/run-test b/Tests/EndToEndTests/Image/AlexNet/Composite/run-test
index d5105fc77..11ea03910 100644
--- a/Tests/EndToEndTests/Image/AlexNet/Composite/run-test
+++ b/Tests/EndToEndTests/Image/AlexNet/Composite/run-test
@@ -25,7 +25,7 @@ OriginalTestDir=..
 
 (cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -)
 if [ $? != 0 ]; then
-  echo Error: Baselines must match original test.
+  echo Error: Baselines copied from $OriginalTestDir must match the baselines of the original test.
   exit 1
 fi 
 

From 5fa7db3975cacd2ff0c17e6bb506c37cf0ea66b2 Mon Sep 17 00:00:00 2001
From: Eldar Akchurin <eldak@microsoft.com>
Date: Fri, 13 May 2016 13:56:53 +0200
Subject: [PATCH 48/51] Adding more comments

---
 Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk b/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
index ced92faf7..ccbad0576 100644
--- a/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
+++ b/Tests/EndToEndTests/Image/AlexNet/AlexNetCommon.cntk
@@ -1,3 +1,4 @@
+# Note: reader configuration comes from AlexNet.cntk or AlexNetComposite.cntk, depending on the test
 ModelDir = "$RunDir$/models"
 
 ndlMacros=$ConfigDir$/Macros.ndl

From ba5c5927454437002138e51c729a0a71964c4d9f Mon Sep 17 00:00:00 2001
From: Alexey Kamenev <alexeyk@microsoft.com>
Date: Fri, 13 May 2016 11:25:13 -0700
Subject: [PATCH 49/51] Updated ResNet readme, added results and model links.

---
 .../Miscellaneous/ImageNet/ResNet/README.md      | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md b/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md
index eadbaaa8f..ad36801ae 100644
--- a/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md
+++ b/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md
@@ -1,12 +1,22 @@
 # CNTK example: ImageNet ResNet 
 
-**Disclaimer: network configurations and experiment settings in this this folder try to follow those published in the [ResNet paper](http://arxiv.org/abs/1512.03385) as close as possible. However, these samples are NOT endorsed or verified by the researchers who published the original work. It is NOT guaranteed that you get the same (or even close) results as those in the paper.**
-
 ## Overview
 
 |Data:     |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) of images.
 |:---------|:---
 |Purpose   |This example demonstrates usage of the NDL (Network Description Language) to define networks similar to ResNet.
-|Network   |NDLNetworkBuilder, deep convolutional networks resembling ResNet networks.
+|Network   |NDLNetworkBuilder, deep convolutional residual networks (ResNet).
 |Training  |Stochastic gradient descent with momentum.
 
+## Details
+The network configurations and experiment settings in this this folder resemble the ones in the original [ResNet paper](http://arxiv.org/abs/1512.03385) with few minor changes inspired by [this work](https://github.com/facebook/fb.resnet.torch).
+The following table contains results as well as links to pre-trained models that can be used in various applications.
+
+| Network       | Top-1 error | Top-5 error | Model
+| ------------- | ----------- | ----------- | ----------
+| ResNet-18     | 29.57       | 10.41       | [Download](https://www.cntk.ai/resnet/ResNet_18.model)
+| ResNet-34     | 27.31       | 8.97        | [Download](https://www.cntk.ai/resnet/ResNet_34.model)
+| ResNet-50     | 24.74       | 7.56        | [Download](https://www.cntk.ai/resnet/ResNet_50.model)
+
+## Notes
+This work is an implementation of ResNets in CNTK. If you are interested in the original implementation of ResNet, follow [this link](https://github.com/KaimingHe/deep-residual-networks).
\ No newline at end of file

From 03d675272249ddf87e855fa9089dc1523999f75d Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Fri, 13 May 2016 14:34:19 -0700
Subject: [PATCH 50/51] Changed NOT_IMPLEMENTED to LogicError to improve error
 messages.

---
 Source/Math/Matrix.cpp | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp
index b73f94659..c36a1a1f5 100644
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@@ -1223,32 +1223,32 @@ void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
     DISPATCH_MATRIX_ON_FLAG(this, this,
         { 
             // Set CPUMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
                 { m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUMatrix is not yet implemented."); },//{ m_CPUMatrix->SetValue(deepCopyFrom.m_GPUMatrix->GetNumRows(), deepCopyFrom.m_GPUMatrix->GetNumCols(), deepCopyFrom.m_GPUMatrix->CopyToArray()); },// //{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
+                { LogicError("AssignValuesOf: Assigning a CPUSparseMatrix to a CPUMatrix is not yet implemented."); },//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUMatrix is not yet implemented."); });//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
         },
         { 
             // Set GPUMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUMatrix is not yet implemented."); },//{ m_GPUMatrix->SetValue(deepCopyFrom.m_CPUMatrix->GetNumRows(), deepCopyFrom.m_CPUMatrix->GetNumCols(), m_GPUMatrix->GetComputeDeviceId(), deepCopyFrom.m_CPUMatrix->Data()); },////{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                 { m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a CPUSparseMatrix to a GPUMatrix is not yet implemented."); },//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a GPUMatrix is not yet implemented."); });//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
         },
         { 
             // Set CPUSparseMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a CPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                 { m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUSparseMatrix is not yet implemented."); });//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
         },
         { 
             // Set GPUSparseMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a CPUMatrix to a GPUSparseMatrix is not yet implemented."); },//{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
                 { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                 { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
                 { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });

From 852e3d6280825d0f4971749fb7ccd0a124e50200 Mon Sep 17 00:00:00 2001
From: thhoens <thhoens@microsoft.com>
Date: Fri, 13 May 2016 14:34:46 -0700
Subject: [PATCH 51/51] WhereNode and PackedIndexNode cannot share values
 because they move them to the CPU.

---
 Source/ComputationNetworkLib/ReshapingNodes.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Source/ComputationNetworkLib/ReshapingNodes.h b/Source/ComputationNetworkLib/ReshapingNodes.h
index f71ec737f..72858e34d 100644
--- a/Source/ComputationNetworkLib/ReshapingNodes.h
+++ b/Source/ComputationNetworkLib/ReshapingNodes.h
@@ -656,6 +656,7 @@ public:
     WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
         Base(deviceId, name)
     {
+        MarkValueNonSharable();
     }
 
     virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
@@ -696,6 +697,7 @@ public:
     PackedIndexNode(DEVICEID_TYPE deviceId, const wstring& name) :
         Base(deviceId, name)
     {
+        MarkValueNonSharable();
     }
 
     virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;