Merged with master

2016-05-24 11:01:03 +02:00 · 2016-05-24 11:01:03 +02:00 · 2a333f3ad4
--- a/.gitattributes
+++ b/.gitattributes
@ -40,6 +40,10 @@ run-test-common text eol=lf
 run-timit-test-common text eol=lf
 make_binary_drop_linux text eol=lf

+# Used from Unix / Cygwin 'md5sum -c', needs to have LF line endings:
+Tests/EndToEndTests/Examples/Speech/TIMIT/WriteBottleneck/expected_output_md5sum.*.txt eol=lf
+Tests/EndToEndTests/Examples/Speech/TIMIT/WriteScaledLogLike/expected_output_md5sum.*.txt eol=lf
+
 Makefile text
 *.sln text
 *.vcxproj text
--- a/.gitignore
+++ b/.gitignore
@ -152,7 +152,9 @@ ModelManifest.xml

 # Python
 *.pyc
-__pychache__/
+__pycache__/
+contrib/Python/doc/_build/*
+contrib/Python/_cntk_default/*

 # =========================
 # Windows detritus
--- a/CNTK.sln
+++ b/CNTK.sln
@ -976,38 +976,148 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CompositeDataReader", "Sour
 		{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
 	EndProjectSection
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CNTKTextFormatReader", "CNTKTextFormatReader", "{B1110F99-A307-4745-B464-7FD75951645A}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CNTKTextFormatReader", "CNTKTextFormatReader", "{99FAAACE-C360-43CF-B706-20621F164484}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelTraining", "ParallelTraining", "{369656B8-DDFD-412E-901B-DFEBCC31ABE0}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{629761D1-7A05-409A-B62B-FC1CCC0D6EED}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Image", "Image", "{D4302516-C77F-4FAF-82FB-18DB39F5A53B}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelTraining", "ParallelTraining", "{06BE675D-80DD-419A-8E00-26953EF11F25}"
 	ProjectSection(SolutionItems) = preProject
 		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\SimpleMultiGPU.cntk = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\SimpleMultiGPU.cntk
 	EndProjectSection
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Speech", "Speech", "{CCEFD1F9-E843-43E0-B127-EF73EF90582D}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Speech", "Speech", "{5642F047-490B-4ABD-8113-8563C872B39F}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "NoQuantization", "NoQuantization", "{751BEA27-2187-4BE5-82E7-A3668CFCE7A9}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Image", "Image", "{2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePrecision", "{4084DC9D-0A53-4029-9C86-92AF243C2E09}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Other", "Other", "{225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MNIST", "MNIST", "{EBD36FD9-FE5B-420E-A572-DC6117300DB3}"
 	ProjectSection(SolutionItems) = preProject
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\run-test-common = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\run-test-common
 	EndProjectSection
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SinglePrecision", "SinglePrecision", "{ACB91DED-FB23-4FF0-A1A1-EBE56B783EFC}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{08D284FA-2914-4B35-A89C-896DBA2B4484}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{95FAC6A0-6AE7-4947-9DFD-498FE71311AD}"
 	ProjectSection(SolutionItems) = preProject
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\run-test-common = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\run-test-common
 	EndProjectSection
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{BCA65A0C-D93B-4F90-81B1-73048DE04DF1}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{A877E526-89C1-422E-9F90-4DDE84135A36}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\01_Conv.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\01_Conv.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\02_BatchNormConv.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\02_BatchNormConv.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\05_ConvLocal.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\05_ConvLocal.cntk
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_Convolution", "01_Convolution", "{071D8449-D080-4141-869D-600CC3C2A0BE}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_BatchNormConv", "02_BatchNormConv", "{D3A74C52-BC74-4DA3-BE93-8F4241D54EE0}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "05_ConvLocal", "05_ConvLocal", "{EC466625-BC66-41DF-B55A-EB28AFABE24E}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.linux.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_OneHidden", "01_OneHidden", "{34D578DB-0101-45C4-9DF0-37DE9AB87C65}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_Convolution", "02_Convolution", "{1FE04815-E02E-498C-B276-6D058D46D754}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.linux.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_ConvBatchNorm", "{2A125ED5-9C8A-4BDF-A200-862104289608}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{E9207003-B860-4D57-B2CA-09AF52FF191F}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.ndl
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.ndl
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.ndl
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\Macros.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\Macros.ndl
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple2d", "Simple2d", "{50420947-E502-40B4-8739-2C0BADD93BEE}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MultiGpu", "MultiGpu", "{935E5A95-888D-4922-AB5A-E9C11D65E974}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{773313DD-69DD-463F-ADC9-E8A902A5223C}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{C8E2EF3B-CCBF-4BDD-8127-2252626FB22B}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Multigpu.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Multigpu.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Simple.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Simple.cntk
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "QuickE2E", "QuickE2E", "{A4F79A83-DE30-40FA-88F4-86304C89AC7F}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{CC47AF62-2558-455F-81CB-36901AF033B0}"
 	ProjectSection(SolutionItems) = preProject
 		Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.cpu.txt
 		Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.gpu.txt
@ -1019,6 +1129,33 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{BCA65A
 		Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\testcases.yml
 	EndProjectSection
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "NoQuantization", "NoQuantization", "{1BA5209D-3EB6-48E7-BE8A-0622315070C0}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{AA14A8DB-669D-447B-A97F-8B726BF30188}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SinglePrecision", "SinglePrecision", "{CA248859-AA91-47D6-AC05-3542AB27E290}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePrecision", "{8B6E9318-5ED0-49BF-945B-072E0D90A886}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test
+		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml
+	EndProjectSection
+EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SparseDSSM", "SparseDSSM", "{1FB54750-B668-4AC3-966F-ED504020AC06}"
 	ProjectSection(SolutionItems) = preProject
 		Tests\EndToEndTests\Text\SparseDSSM\baseline.cpu.txt = Tests\EndToEndTests\Text\SparseDSSM\baseline.cpu.txt
@ -1031,23 +1168,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SparseDSSM", "SparseDSSM",
 		Tests\EndToEndTests\Text\SparseDSSM\testcases.yml = Tests\EndToEndTests\Text\SparseDSSM\testcases.yml
 	EndProjectSection
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Image", "Image", "{59980D6E-1732-4809-B17C-6EF4B4F5CF8B}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "QuickE2E", "QuickE2E", "{6FB9C411-C19A-403A-94C2-F7DF393F7612}"
-	ProjectSection(SolutionItems) = preProject
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test
-		Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml
-	EndProjectSection
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{C30742B5-4DBE-4D80-B429-901856E4043D}"
-	ProjectSection(SolutionItems) = preProject
-		Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt
-	EndProjectSection
-EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "WriteCommand", "WriteCommand", "{3E9BD61F-1F0A-4966-BE17-803AEFD1DFA4}"
 	ProjectSection(SolutionItems) = preProject
 		tests\endtoendtests\Speech\DNN\WriteCommand\baseline.cpu.txt = tests\endtoendtests\Speech\DNN\WriteCommand\baseline.cpu.txt
@ -1526,19 +1646,37 @@ Global
 		{EC780385-7580-4D15-914B-1D878A295CBC} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
 		{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
 		{181664AC-4C95-4798-A923-09B879215B33} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
+		{99FAAACE-C360-43CF-B706-20621F164484} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
+		{629761D1-7A05-409A-B62B-FC1CCC0D6EED} = {99FAAACE-C360-43CF-B706-20621F164484}
+		{D4302516-C77F-4FAF-82FB-18DB39F5A53B} = {99FAAACE-C360-43CF-B706-20621F164484}
+		{06BE675D-80DD-419A-8E00-26953EF11F25} = {99FAAACE-C360-43CF-B706-20621F164484}
+		{5642F047-490B-4ABD-8113-8563C872B39F} = {99FAAACE-C360-43CF-B706-20621F164484}
+		{2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F} = {629761D1-7A05-409A-B62B-FC1CCC0D6EED}
+		{225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3} = {629761D1-7A05-409A-B62B-FC1CCC0D6EED}
+		{EBD36FD9-FE5B-420E-A572-DC6117300DB3} = {2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}
+		{08D284FA-2914-4B35-A89C-896DBA2B4484} = {2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}
+		{95FAC6A0-6AE7-4947-9DFD-498FE71311AD} = {08D284FA-2914-4B35-A89C-896DBA2B4484}
+		{A877E526-89C1-422E-9F90-4DDE84135A36} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
+		{071D8449-D080-4141-869D-600CC3C2A0BE} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
+		{D3A74C52-BC74-4DA3-BE93-8F4241D54EE0} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
+		{EC466625-BC66-41DF-B55A-EB28AFABE24E} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
+		{34D578DB-0101-45C4-9DF0-37DE9AB87C65} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
+		{1FE04815-E02E-498C-B276-6D058D46D754} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
+		{2A125ED5-9C8A-4BDF-A200-862104289608} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
+		{E9207003-B860-4D57-B2CA-09AF52FF191F} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
+		{50420947-E502-40B4-8739-2C0BADD93BEE} = {225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3}
+		{935E5A95-888D-4922-AB5A-E9C11D65E974} = {50420947-E502-40B4-8739-2C0BADD93BEE}
+		{773313DD-69DD-463F-ADC9-E8A902A5223C} = {50420947-E502-40B4-8739-2C0BADD93BEE}
+		{C8E2EF3B-CCBF-4BDD-8127-2252626FB22B} = {50420947-E502-40B4-8739-2C0BADD93BEE}
+		{A4F79A83-DE30-40FA-88F4-86304C89AC7F} = {D4302516-C77F-4FAF-82FB-18DB39F5A53B}
+		{CC47AF62-2558-455F-81CB-36901AF033B0} = {5642F047-490B-4ABD-8113-8563C872B39F}
+		{1BA5209D-3EB6-48E7-BE8A-0622315070C0} = {06BE675D-80DD-419A-8E00-26953EF11F25}
+		{AA14A8DB-669D-447B-A97F-8B726BF30188} = {06BE675D-80DD-419A-8E00-26953EF11F25}
+		{CA248859-AA91-47D6-AC05-3542AB27E290} = {1BA5209D-3EB6-48E7-BE8A-0622315070C0}
+		{8B6E9318-5ED0-49BF-945B-072E0D90A886} = {1BA5209D-3EB6-48E7-BE8A-0622315070C0}
 		{86883653-8A61-4038-81A0-2379FAE4200A} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
 		{7B7A563D-AA8E-4660-A805-D50235A02120} = {33EBFE78-A1A8-4961-8938-92A271941F94}
-		{B1110F99-A307-4745-B464-7FD75951645A} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
-		{369656B8-DDFD-412E-901B-DFEBCC31ABE0} = {B1110F99-A307-4745-B464-7FD75951645A}
-		{CCEFD1F9-E843-43E0-B127-EF73EF90582D} = {B1110F99-A307-4745-B464-7FD75951645A}
-		{751BEA27-2187-4BE5-82E7-A3668CFCE7A9} = {369656B8-DDFD-412E-901B-DFEBCC31ABE0}
-		{4084DC9D-0A53-4029-9C86-92AF243C2E09} = {751BEA27-2187-4BE5-82E7-A3668CFCE7A9}
-		{ACB91DED-FB23-4FF0-A1A1-EBE56B783EFC} = {751BEA27-2187-4BE5-82E7-A3668CFCE7A9}
-		{BCA65A0C-D93B-4F90-81B1-73048DE04DF1} = {CCEFD1F9-E843-43E0-B127-EF73EF90582D}
 		{1FB54750-B668-4AC3-966F-ED504020AC06} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
-		{59980D6E-1732-4809-B17C-6EF4B4F5CF8B} = {B1110F99-A307-4745-B464-7FD75951645A}
-		{6FB9C411-C19A-403A-94C2-F7DF393F7612} = {59980D6E-1732-4809-B17C-6EF4B4F5CF8B}
-		{C30742B5-4DBE-4D80-B429-901856E4043D} = {369656B8-DDFD-412E-901B-DFEBCC31ABE0}
 		{3E9BD61F-1F0A-4966-BE17-803AEFD1DFA4} = {6994C86D-A672-4254-824A-51F4DFEB807F}
 		{5560DDD4-1E6E-4F41-B9BD-F52A19DF0B31} = {6994C86D-A672-4254-824A-51F4DFEB807F}
 		{9834E864-A8CD-4D28-A3C9-F79FE0F421AE} = {6994C86D-A672-4254-824A-51F4DFEB807F}
--- a/Examples/Image/MNIST/Config/02_Convolution.cntk
+++ b/Examples/Image/MNIST/Config/02_Convolution.cntk
@ -26,7 +26,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
 traceLevel=1
 numMBsToShowResult=500

-prefetch=true
+# Note: turn off prefetching; known to crash UCIFastReader occasionally.
+prefetch=false

 # If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
 initOnCPUOnly=true
--- a/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk
+++ b/Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk
@ -16,7 +16,8 @@ imageLayout = "cudnn"
 # If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
 initOnCPUOnly=true

-prefetch = "true"
+# Note: turn off prefetching; known to crash UCIFastReader occasionally.
+prefetch = "false"

 command = Train:Test

--- a/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.cntk
+++ b/Examples/Image/Miscellaneous/CIFAR-10/02_BatchNormConv.cntk
@ -16,7 +16,8 @@ imageLayout = "cudnn"
 # If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
 initOnCPUOnly=true

-prefetch = "true"
+# Note: turn off prefetching; known to crash UCIFastReader occasionally.
+prefetch = "false"

 command = Train:Test

--- a/Examples/Image/Miscellaneous/CIFAR-10/05_ConvLocal.cntk
+++ b/Examples/Image/Miscellaneous/CIFAR-10/05_ConvLocal.cntk
@ -13,7 +13,8 @@ imageLayout = "cudnn"
 # override the above as follows when running on CPU:
 # deviceId = -1

-prefetch = "true"
+# Note: turn off prefetching; known to crash UCIFastReader occasionally.
+prefetch = "false"

 command = Train:Test

--- a/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md
+++ b/Examples/Image/Miscellaneous/ImageNet/ResNet/README.md
@ -1,12 +1,22 @@
 # CNTK example: ImageNet ResNet 

-**Disclaimer: network configurations and experiment settings in this this folder try to follow those published in the [ResNet paper](http://arxiv.org/abs/1512.03385) as close as possible. However, these samples are NOT endorsed or verified by the researchers who published the original work. It is NOT guaranteed that you get the same (or even close) results as those in the paper.**
-
 ## Overview

 |Data:     |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) of images.
 |:---------|:---
 |Purpose   |This example demonstrates usage of the NDL (Network Description Language) to define networks similar to ResNet.
-|Network   |NDLNetworkBuilder, deep convolutional networks resembling ResNet networks.
+|Network   |NDLNetworkBuilder, deep convolutional residual networks (ResNet).
 |Training  |Stochastic gradient descent with momentum.

+## Details
+The network configurations and experiment settings in this this folder resemble the ones in the original [ResNet paper](http://arxiv.org/abs/1512.03385) with few minor changes inspired by [this work](https://github.com/facebook/fb.resnet.torch).
+The following table contains results as well as links to pre-trained models that can be used in various applications.
+
+| Network       | Top-1 error | Top-5 error | Model
+| ------------- | ----------- | ----------- | ----------
+| ResNet-18     | 29.57       | 10.41       | [Download](https://www.cntk.ai/resnet/ResNet_18.model)
+| ResNet-34     | 27.31       | 8.97        | [Download](https://www.cntk.ai/resnet/ResNet_34.model)
+| ResNet-50     | 24.74       | 7.56        | [Download](https://www.cntk.ai/resnet/ResNet_50.model)
+
+## Notes
+This work is an implementation of ResNets in CNTK. If you are interested in the original implementation of ResNet, follow [this link](https://github.com/KaimingHe/deep-residual-networks).
--- a/Examples/Other/Simple2d/Data/SimpleDataTest_cntk_text.txt
+++ b/Examples/Other/Simple2d/Data/SimpleDataTest_cntk_text.txt
@ -0,0 +1,603 @@
+|labels 0 1	|features -1 -1
+|labels 0 1	|features -1 -0.99
+|labels 0 1	|features -1 -0.98
+|labels 0 1	|features -1 -0.97
+|labels 0 1	|features -1 -0.96
+|labels 0 1	|features -1 -0.95
+|labels 0 1	|features -1 -0.94
+|labels 0 1	|features -1 -0.93
+|labels 0 1	|features -1 -0.92
+|labels 0 1	|features -1 -0.91
+|labels 0 1	|features -1 -0.9
+|labels 0 1	|features -1 -0.89
+|labels 0 1	|features -1 -0.88
+|labels 0 1	|features -1 -0.87
+|labels 0 1	|features -1 -0.86
+|labels 0 1	|features -1 -0.85
+|labels 0 1	|features -1 -0.84
+|labels 0 1	|features -1 -0.83
+|labels 0 1	|features -1 -0.82
+|labels 0 1	|features -1 -0.81
+|labels 0 1	|features -1 -0.8
+|labels 0 1	|features -1 -0.79
+|labels 0 1	|features -1 -0.78
+|labels 0 1	|features -1 -0.77
+|labels 0 1	|features -1 -0.76
+|labels 0 1	|features -1 -0.75
+|labels 0 1	|features -1 -0.74
+|labels 0 1	|features -1 -0.73
+|labels 0 1	|features -1 -0.72
+|labels 0 1	|features -1 -0.71
+|labels 0 1	|features -1 -0.7
+|labels 0 1	|features -1 -0.69
+|labels 0 1	|features -1 -0.68
+|labels 0 1	|features -1 -0.67
+|labels 0 1	|features -1 -0.66
+|labels 0 1	|features -1 -0.65
+|labels 0 1	|features -1 -0.64
+|labels 0 1	|features -1 -0.63
+|labels 0 1	|features -1 -0.62
+|labels 0 1	|features -1 -0.61
+|labels 0 1	|features -1 -0.6
+|labels 0 1	|features -1 -0.59
+|labels 0 1	|features -1 -0.58
+|labels 0 1	|features -1 -0.57
+|labels 0 1	|features -1 -0.56
+|labels 0 1	|features -1 -0.55
+|labels 0 1	|features -1 -0.54
+|labels 0 1	|features -1 -0.53
+|labels 0 1	|features -1 -0.52
+|labels 0 1	|features -1 -0.51
+|labels 0 1	|features -1 -0.5
+|labels 0 1	|features -1 -0.49
+|labels 0 1	|features -1 -0.48
+|labels 0 1	|features -1 -0.47
+|labels 0 1	|features -1 -0.46
+|labels 0 1	|features -1 -0.45
+|labels 0 1	|features -1 -0.44
+|labels 0 1	|features -1 -0.43
+|labels 0 1	|features -1 -0.42
+|labels 0 1	|features -1 -0.41
+|labels 0 1	|features -1 -0.4
+|labels 0 1	|features -1 -0.39
+|labels 0 1	|features -1 -0.38
+|labels 0 1	|features -1 -0.37
+|labels 0 1	|features -1 -0.36
+|labels 0 1	|features -1 -0.35
+|labels 0 1	|features -1 -0.34
+|labels 0 1	|features -1 -0.33
+|labels 0 1	|features -1 -0.32
+|labels 0 1	|features -1 -0.31
+|labels 0 1	|features -1 -0.3
+|labels 0 1	|features -1 -0.29
+|labels 0 1	|features -1 -0.28
+|labels 0 1	|features -1 -0.27
+|labels 0 1	|features -1 -0.26
+|labels 0 1	|features -1 -0.25
+|labels 0 1	|features -1 -0.24
+|labels 0 1	|features -1 -0.23
+|labels 0 1	|features -1 -0.22
+|labels 0 1	|features -1 -0.21
+|labels 0 1	|features -1 -0.2
+|labels 0 1	|features -1 -0.19
+|labels 0 1	|features -1 -0.18
+|labels 0 1	|features -1 -0.17
+|labels 0 1	|features -1 -0.16
+|labels 0 1	|features -1 -0.15
+|labels 0 1	|features -1 -0.14
+|labels 0 1	|features -1 -0.13
+|labels 0 1	|features -1 -0.12
+|labels 0 1	|features -1 -0.11
+|labels 0 1	|features -1 -0.1
+|labels 1 0	|features -1 -0.09
+|labels 1 0	|features -1 -0.08
+|labels 1 0	|features -1 -0.07
+|labels 1 0	|features -1 -0.06
+|labels 1 0	|features -1 -0.05
+|labels 1 0	|features -1 -0.04
+|labels 1 0	|features -1 -0.03
+|labels 1 0	|features -1 -0.02
+|labels 1 0	|features -1 -0.01
+|labels 1 0	|features -1 0
+|labels 1 0	|features -1 0.01
+|labels 1 0	|features -1 0.02
+|labels 1 0	|features -1 0.03
+|labels 1 0	|features -1 0.04
+|labels 1 0	|features -1 0.05
+|labels 1 0	|features -1 0.06
+|labels 1 0	|features -1 0.07
+|labels 1 0	|features -1 0.08
+|labels 1 0	|features -1 0.09
+|labels 1 0	|features -1 0.1
+|labels 1 0	|features -1 0.11
+|labels 1 0	|features -1 0.12
+|labels 1 0	|features -1 0.13
+|labels 1 0	|features -1 0.14
+|labels 1 0	|features -1 0.15
+|labels 1 0	|features -1 0.16
+|labels 1 0	|features -1 0.17
+|labels 1 0	|features -1 0.18
+|labels 1 0	|features -1 0.19
+|labels 1 0	|features -1 0.2
+|labels 1 0	|features -1 0.21
+|labels 1 0	|features -1 0.22
+|labels 1 0	|features -1 0.23
+|labels 1 0	|features -1 0.24
+|labels 1 0	|features -1 0.25
+|labels 1 0	|features -1 0.26
+|labels 1 0	|features -1 0.27
+|labels 1 0	|features -1 0.28
+|labels 1 0	|features -1 0.29
+|labels 1 0	|features -1 0.3
+|labels 1 0	|features -1 0.31
+|labels 1 0	|features -1 0.32
+|labels 1 0	|features -1 0.33
+|labels 1 0	|features -1 0.34
+|labels 1 0	|features -1 0.35
+|labels 1 0	|features -1 0.36
+|labels 1 0	|features -1 0.37
+|labels 1 0	|features -1 0.38
+|labels 1 0	|features -1 0.39
+|labels 1 0	|features -1 0.4
+|labels 1 0	|features -1 0.41
+|labels 1 0	|features -1 0.42
+|labels 1 0	|features -1 0.43
+|labels 1 0	|features -1 0.44
+|labels 1 0	|features -1 0.45
+|labels 1 0	|features -1 0.46
+|labels 1 0	|features -1 0.47
+|labels 1 0	|features -1 0.48
+|labels 1 0	|features -1 0.49
+|labels 1 0	|features -1 0.5
+|labels 1 0	|features -1 0.51
+|labels 1 0	|features -1 0.52
+|labels 1 0	|features -1 0.53
+|labels 1 0	|features -1 0.54
+|labels 1 0	|features -1 0.55
+|labels 1 0	|features -1 0.56
+|labels 1 0	|features -1 0.57
+|labels 1 0	|features -1 0.58
+|labels 1 0	|features -1 0.59
+|labels 1 0	|features -1 0.6
+|labels 1 0	|features -1 0.61
+|labels 1 0	|features -1 0.62
+|labels 1 0	|features -1 0.63
+|labels 1 0	|features -1 0.64
+|labels 1 0	|features -1 0.65
+|labels 1 0	|features -1 0.66
+|labels 1 0	|features -1 0.67
+|labels 1 0	|features -1 0.68
+|labels 1 0	|features -1 0.69
+|labels 1 0	|features -1 0.7
+|labels 1 0	|features -1 0.71
+|labels 1 0	|features -1 0.72
+|labels 1 0	|features -1 0.73
+|labels 1 0	|features -1 0.74
+|labels 1 0	|features -1 0.75
+|labels 1 0	|features -1 0.76
+|labels 1 0	|features -1 0.77
+|labels 1 0	|features -1 0.78
+|labels 1 0	|features -1 0.79
+|labels 1 0	|features -1 0.8
+|labels 1 0	|features -1 0.81
+|labels 1 0	|features -1 0.82
+|labels 1 0	|features -1 0.83
+|labels 1 0	|features -1 0.84
+|labels 1 0	|features -1 0.85
+|labels 1 0	|features -1 0.86
+|labels 1 0	|features -1 0.87
+|labels 1 0	|features -1 0.88
+|labels 1 0	|features -1 0.89
+|labels 1 0	|features -1 0.9
+|labels 1 0	|features -1 0.91
+|labels 1 0	|features -1 0.92
+|labels 1 0	|features -1 0.93
+|labels 1 0	|features -1 0.94
+|labels 1 0	|features -1 0.95
+|labels 1 0	|features -1 0.96
+|labels 1 0	|features -1 0.97
+|labels 1 0	|features -1 0.98
+|labels 1 0	|features -1 0.99
+|labels 1 0	|features -1 0
+|labels 0 1	|features 0 -1
+|labels 0 1	|features 0 -0.99
+|labels 0 1	|features 0 -0.98
+|labels 0 1	|features 0 -0.97
+|labels 0 1	|features 0 -0.96
+|labels 0 1	|features 0 -0.95
+|labels 0 1	|features 0 -0.94
+|labels 0 1	|features 0 -0.93
+|labels 0 1	|features 0 -0.92
+|labels 0 1	|features 0 -0.91
+|labels 0 1	|features 0 -0.9
+|labels 0 1	|features 0 -0.89
+|labels 0 1	|features 0 -0.88
+|labels 0 1	|features 0 -0.87
+|labels 0 1	|features 0 -0.86
+|labels 0 1	|features 0 -0.85
+|labels 0 1	|features 0 -0.84
+|labels 0 1	|features 0 -0.83
+|labels 0 1	|features 0 -0.82
+|labels 0 1	|features 0 -0.81
+|labels 0 1	|features 0 -0.8
+|labels 0 1	|features 0 -0.79
+|labels 0 1	|features 0 -0.78
+|labels 0 1	|features 0 -0.77
+|labels 0 1	|features 0 -0.76
+|labels 0 1	|features 0 -0.75
+|labels 0 1	|features 0 -0.74
+|labels 0 1	|features 0 -0.73
+|labels 0 1	|features 0 -0.72
+|labels 0 1	|features 0 -0.71
+|labels 0 1	|features 0 -0.7
+|labels 0 1	|features 0 -0.69
+|labels 0 1	|features 0 -0.68
+|labels 0 1	|features 0 -0.67
+|labels 0 1	|features 0 -0.66
+|labels 0 1	|features 0 -0.65
+|labels 0 1	|features 0 -0.64
+|labels 0 1	|features 0 -0.63
+|labels 0 1	|features 0 -0.62
+|labels 0 1	|features 0 -0.61
+|labels 0 1	|features 0 -0.6
+|labels 0 1	|features 0 -0.59
+|labels 0 1	|features 0 -0.58
+|labels 0 1	|features 0 -0.57
+|labels 0 1	|features 0 -0.56
+|labels 0 1	|features 0 -0.55
+|labels 0 1	|features 0 -0.54
+|labels 0 1	|features 0 -0.53
+|labels 0 1	|features 0 -0.52
+|labels 0 1	|features 0 -0.51
+|labels 0 1	|features 0 -0.5
+|labels 0 1	|features 0 -0.49
+|labels 0 1	|features 0 -0.48
+|labels 0 1	|features 0 -0.47
+|labels 0 1	|features 0 -0.46
+|labels 0 1	|features 0 -0.45
+|labels 0 1	|features 0 -0.44
+|labels 0 1	|features 0 -0.43
+|labels 0 1	|features 0 -0.42
+|labels 0 1	|features 0 -0.41
+|labels 0 1	|features 0 -0.4
+|labels 0 1	|features 0 -0.39
+|labels 0 1	|features 0 -0.38
+|labels 0 1	|features 0 -0.37
+|labels 0 1	|features 0 -0.36
+|labels 0 1	|features 0 -0.35
+|labels 0 1	|features 0 -0.34
+|labels 0 1	|features 0 -0.33
+|labels 0 1	|features 0 -0.32
+|labels 0 1	|features 0 -0.31
+|labels 0 1	|features 0 -0.3
+|labels 0 1	|features 0 -0.29
+|labels 0 1	|features 0 -0.28
+|labels 0 1	|features 0 -0.27
+|labels 0 1	|features 0 -0.26
+|labels 0 1	|features 0 -0.25
+|labels 0 1	|features 0 -0.24
+|labels 0 1	|features 0 -0.23
+|labels 0 1	|features 0 -0.22
+|labels 0 1	|features 0 -0.21
+|labels 0 1	|features 0 -0.2
+|labels 0 1	|features 0 -0.19
+|labels 0 1	|features 0 -0.18
+|labels 0 1	|features 0 -0.17
+|labels 0 1	|features 0 -0.16
+|labels 0 1	|features 0 -0.15
+|labels 0 1	|features 0 -0.14
+|labels 0 1	|features 0 -0.13
+|labels 0 1	|features 0 -0.12
+|labels 0 1	|features 0 -0.11
+|labels 0 1	|features 0 -0.1
+|labels 1 0	|features 0 -0.09
+|labels 1 0	|features 0 -0.08
+|labels 1 0	|features 0 -0.07
+|labels 1 0	|features 0 -0.06
+|labels 1 0	|features 0 -0.05
+|labels 1 0	|features 0 -0.04
+|labels 1 0	|features 0 -0.03
+|labels 1 0	|features 0 -0.02
+|labels 1 0	|features 0 -0.01
+|labels 1 0	|features 0 0
+|labels 1 0	|features 0 0.01
+|labels 1 0	|features 0 0.02
+|labels 1 0	|features 0 0.03
+|labels 1 0	|features 0 0.04
+|labels 1 0	|features 0 0.05
+|labels 1 0	|features 0 0.06
+|labels 1 0	|features 0 0.07
+|labels 1 0	|features 0 0.08
+|labels 1 0	|features 0 0.09
+|labels 1 0	|features 0 0.1
+|labels 1 0	|features 0 0.11
+|labels 1 0	|features 0 0.12
+|labels 1 0	|features 0 0.13
+|labels 1 0	|features 0 0.14
+|labels 1 0	|features 0 0.15
+|labels 1 0	|features 0 0.16
+|labels 1 0	|features 0 0.17
+|labels 1 0	|features 0 0.18
+|labels 1 0	|features 0 0.19
+|labels 1 0	|features 0 0.2
+|labels 1 0	|features 0 0.21
+|labels 1 0	|features 0 0.22
+|labels 1 0	|features 0 0.23
+|labels 1 0	|features 0 0.24
+|labels 1 0	|features 0 0.25
+|labels 1 0	|features 0 0.26
+|labels 1 0	|features 0 0.27
+|labels 1 0	|features 0 0.28
+|labels 1 0	|features 0 0.29
+|labels 1 0	|features 0 0.3
+|labels 1 0	|features 0 0.31
+|labels 1 0	|features 0 0.32
+|labels 1 0	|features 0 0.33
+|labels 1 0	|features 0 0.34
+|labels 1 0	|features 0 0.35
+|labels 1 0	|features 0 0.36
+|labels 1 0	|features 0 0.37
+|labels 1 0	|features 0 0.38
+|labels 1 0	|features 0 0.39
+|labels 1 0	|features 0 0.4
+|labels 1 0	|features 0 0.41
+|labels 1 0	|features 0 0.42
+|labels 1 0	|features 0 0.43
+|labels 1 0	|features 0 0.44
+|labels 1 0	|features 0 0.45
+|labels 1 0	|features 0 0.46
+|labels 1 0	|features 0 0.47
+|labels 1 0	|features 0 0.48
+|labels 1 0	|features 0 0.49
+|labels 1 0	|features 0 0.5
+|labels 1 0	|features 0 0.51
+|labels 1 0	|features 0 0.52
+|labels 1 0	|features 0 0.53
+|labels 1 0	|features 0 0.54
+|labels 1 0	|features 0 0.55
+|labels 1 0	|features 0 0.56
+|labels 1 0	|features 0 0.57
+|labels 1 0	|features 0 0.58
+|labels 1 0	|features 0 0.59
+|labels 1 0	|features 0 0.6
+|labels 1 0	|features 0 0.61
+|labels 1 0	|features 0 0.62
+|labels 1 0	|features 0 0.63
+|labels 1 0	|features 0 0.64
+|labels 1 0	|features 0 0.65
+|labels 1 0	|features 0 0.66
+|labels 1 0	|features 0 0.67
+|labels 1 0	|features 0 0.68
+|labels 1 0	|features 0 0.69
+|labels 1 0	|features 0 0.7
+|labels 1 0	|features 0 0.71
+|labels 1 0	|features 0 0.72
+|labels 1 0	|features 0 0.73
+|labels 1 0	|features 0 0.74
+|labels 1 0	|features 0 0.75
+|labels 1 0	|features 0 0.76
+|labels 1 0	|features 0 0.77
+|labels 1 0	|features 0 0.78
+|labels 1 0	|features 0 0.79
+|labels 1 0	|features 0 0.8
+|labels 1 0	|features 0 0.81
+|labels 1 0	|features 0 0.82
+|labels 1 0	|features 0 0.83
+|labels 1 0	|features 0 0.84
+|labels 1 0	|features 0 0.85
+|labels 1 0	|features 0 0.86
+|labels 1 0	|features 0 0.87
+|labels 1 0	|features 0 0.88
+|labels 1 0	|features 0 0.89
+|labels 1 0	|features 0 0.9
+|labels 1 0	|features 0 0.91
+|labels 1 0	|features 0 0.92
+|labels 1 0	|features 0 0.93
+|labels 1 0	|features 0 0.94
+|labels 1 0	|features 0 0.95
+|labels 1 0	|features 0 0.96
+|labels 1 0	|features 0 0.97
+|labels 1 0	|features 0 0.98
+|labels 1 0	|features 0 0.99
+|labels 1 0	|features 0 1
+|labels 0 1	|features 1 -1
+|labels 0 1	|features 1 -0.99
+|labels 0 1	|features 1 -0.98
+|labels 0 1	|features 1 -0.97
+|labels 0 1	|features 1 -0.96
+|labels 0 1	|features 1 -0.95
+|labels 0 1	|features 1 -0.94
+|labels 0 1	|features 1 -0.93
+|labels 0 1	|features 1 -0.92
+|labels 0 1	|features 1 -0.91
+|labels 0 1	|features 1 -0.9
+|labels 0 1	|features 1 -0.89
+|labels 0 1	|features 1 -0.88
+|labels 0 1	|features 1 -0.87
+|labels 0 1	|features 1 -0.86
+|labels 0 1	|features 1 -0.85
+|labels 0 1	|features 1 -0.84
+|labels 0 1	|features 1 -0.83
+|labels 0 1	|features 1 -0.82
+|labels 0 1	|features 1 -0.81
+|labels 0 1	|features 1 -0.8
+|labels 0 1	|features 1 -0.79
+|labels 0 1	|features 1 -0.78
+|labels 0 1	|features 1 -0.77
+|labels 0 1	|features 1 -0.76
+|labels 0 1	|features 1 -0.75
+|labels 0 1	|features 1 -0.74
+|labels 0 1	|features 1 -0.73
+|labels 0 1	|features 1 -0.72
+|labels 0 1	|features 1 -0.71
+|labels 0 1	|features 1 -0.7
+|labels 0 1	|features 1 -0.69
+|labels 0 1	|features 1 -0.68
+|labels 0 1	|features 1 -0.67
+|labels 0 1	|features 1 -0.66
+|labels 0 1	|features 1 -0.65
+|labels 0 1	|features 1 -0.64
+|labels 0 1	|features 1 -0.63
+|labels 0 1	|features 1 -0.62
+|labels 0 1	|features 1 -0.61
+|labels 0 1	|features 1 -0.6
+|labels 0 1	|features 1 -0.59
+|labels 0 1	|features 1 -0.58
+|labels 0 1	|features 1 -0.57
+|labels 0 1	|features 1 -0.56
+|labels 0 1	|features 1 -0.55
+|labels 0 1	|features 1 -0.54
+|labels 0 1	|features 1 -0.53
+|labels 0 1	|features 1 -0.52
+|labels 0 1	|features 1 -0.51
+|labels 0 1	|features 1 -0.5
+|labels 0 1	|features 1 -0.49
+|labels 0 1	|features 1 -0.48
+|labels 0 1	|features 1 -0.47
+|labels 0 1	|features 1 -0.46
+|labels 0 1	|features 1 -0.45
+|labels 0 1	|features 1 -0.44
+|labels 0 1	|features 1 -0.43
+|labels 0 1	|features 1 -0.42
+|labels 0 1	|features 1 -0.41
+|labels 0 1	|features 1 -0.4
+|labels 0 1	|features 1 -0.39
+|labels 0 1	|features 1 -0.38
+|labels 0 1	|features 1 -0.37
+|labels 0 1	|features 1 -0.36
+|labels 0 1	|features 1 -0.35
+|labels 0 1	|features 1 -0.34
+|labels 0 1	|features 1 -0.33
+|labels 0 1	|features 1 -0.32
+|labels 0 1	|features 1 -0.31
+|labels 0 1	|features 1 -0.3
+|labels 0 1	|features 1 -0.29
+|labels 0 1	|features 1 -0.28
+|labels 0 1	|features 1 -0.27
+|labels 0 1	|features 1 -0.26
+|labels 0 1	|features 1 -0.25
+|labels 0 1	|features 1 -0.24
+|labels 0 1	|features 1 -0.23
+|labels 0 1	|features 1 -0.22
+|labels 0 1	|features 1 -0.21
+|labels 0 1	|features 1 -0.2
+|labels 0 1	|features 1 -0.19
+|labels 0 1	|features 1 -0.18
+|labels 0 1	|features 1 -0.17
+|labels 0 1	|features 1 -0.16
+|labels 0 1	|features 1 -0.15
+|labels 0 1	|features 1 -0.14
+|labels 0 1	|features 1 -0.13
+|labels 0 1	|features 1 -0.12
+|labels 0 1	|features 1 -0.11
+|labels 0 1	|features 1 -0.1
+|labels 0 1	|features 1 -0.09
+|labels 0 1	|features 1 -0.08
+|labels 0 1	|features 1 -0.07
+|labels 0 1	|features 1 -0.06
+|labels 0 1	|features 1 -0.05
+|labels 0 1	|features 1 -0.04
+|labels 0 1	|features 1 -0.03
+|labels 0 1	|features 1 -0.02
+|labels 0 1	|features 1 -0.01
+|labels 1 0	|features 1 0
+|labels 1 0	|features 1 0.01
+|labels 1 0	|features 1 0.02
+|labels 1 0	|features 1 0.03
+|labels 1 0	|features 1 0.04
+|labels 1 0	|features 1 0.05
+|labels 1 0	|features 1 0.06
+|labels 1 0	|features 1 0.07
+|labels 1 0	|features 1 0.08
+|labels 1 0	|features 1 0.09
+|labels 1 0	|features 1 0.1
+|labels 1 0	|features 1 0.11
+|labels 1 0	|features 1 0.12
+|labels 1 0	|features 1 0.13
+|labels 1 0	|features 1 0.14
+|labels 1 0	|features 1 0.15
+|labels 1 0	|features 1 0.16
+|labels 1 0	|features 1 0.17
+|labels 1 0	|features 1 0.18
+|labels 1 0	|features 1 0.19
+|labels 1 0	|features 1 0.2
+|labels 1 0	|features 1 0.21
+|labels 1 0	|features 1 0.22
+|labels 1 0	|features 1 0.23
+|labels 1 0	|features 1 0.24
+|labels 1 0	|features 1 0.25
+|labels 1 0	|features 1 0.26
+|labels 1 0	|features 1 0.27
+|labels 1 0	|features 1 0.28
+|labels 1 0	|features 1 0.29
+|labels 1 0	|features 1 0.3
+|labels 1 0	|features 1 0.31
+|labels 1 0	|features 1 0.32
+|labels 1 0	|features 1 0.33
+|labels 1 0	|features 1 0.34
+|labels 1 0	|features 1 0.35
+|labels 1 0	|features 1 0.36
+|labels 1 0	|features 1 0.37
+|labels 1 0	|features 1 0.38
+|labels 1 0	|features 1 0.39
+|labels 1 0	|features 1 0.4
+|labels 1 0	|features 1 0.41
+|labels 1 0	|features 1 0.42
+|labels 1 0	|features 1 0.43
+|labels 1 0	|features 1 0.44
+|labels 1 0	|features 1 0.45
+|labels 1 0	|features 1 0.46
+|labels 1 0	|features 1 0.47
+|labels 1 0	|features 1 0.48
+|labels 1 0	|features 1 0.49
+|labels 1 0	|features 1 0.5
+|labels 1 0	|features 1 0.51
+|labels 1 0	|features 1 0.52
+|labels 1 0	|features 1 0.53
+|labels 1 0	|features 1 0.54
+|labels 1 0	|features 1 0.55
+|labels 1 0	|features 1 0.56
+|labels 1 0	|features 1 0.57
+|labels 1 0	|features 1 0.58
+|labels 1 0	|features 1 0.59
+|labels 1 0	|features 1 0.6
+|labels 1 0	|features 1 0.61
+|labels 1 0	|features 1 0.62
+|labels 1 0	|features 1 0.63
+|labels 1 0	|features 1 0.64
+|labels 1 0	|features 1 0.65
+|labels 1 0	|features 1 0.66
+|labels 1 0	|features 1 0.67
+|labels 1 0	|features 1 0.68
+|labels 1 0	|features 1 0.69
+|labels 1 0	|features 1 0.7
+|labels 1 0	|features 1 0.71
+|labels 1 0	|features 1 0.72
+|labels 1 0	|features 1 0.73
+|labels 1 0	|features 1 0.74
+|labels 1 0	|features 1 0.75
+|labels 1 0	|features 1 0.76
+|labels 1 0	|features 1 0.77
+|labels 1 0	|features 1 0.78
+|labels 1 0	|features 1 0.79
+|labels 1 0	|features 1 0.8
+|labels 1 0	|features 1 0.81
+|labels 1 0	|features 1 0.82
+|labels 1 0	|features 1 0.83
+|labels 1 0	|features 1 0.84
+|labels 1 0	|features 1 0.85
+|labels 1 0	|features 1 0.86
+|labels 1 0	|features 1 0.87
+|labels 1 0	|features 1 0.88
+|labels 1 0	|features 1 0.89
+|labels 1 0	|features 1 0.9
+|labels 1 0	|features 1 0.91
+|labels 1 0	|features 1 0.92
+|labels 1 0	|features 1 0.93
+|labels 1 0	|features 1 0.94
+|labels 1 0	|features 1 0.95
+|labels 1 0	|features 1 0.96
+|labels 1 0	|features 1 0.97
+|labels 1 0	|features 1 0.98
+|labels 1 0	|features 1 0.99
+|labels 1 0	|features 1 1
--- a/Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt
+++ b/Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt
--- a/1
+++ b/1
@ -247,6 +247,7 @@ READER_SRC =\
 	$(SOURCEDIR)/Readers/ReaderLib/TruncatedBpttPacker.cpp \
 	$(SOURCEDIR)/Readers/ReaderLib/PackerBase.cpp \
 	$(SOURCEDIR)/Readers/ReaderLib/FramePacker.cpp \
+    $(SOURCEDIR)/Readers/ReaderLib/ChunkCache.cpp \

 COMMON_SRC =\
 	$(SOURCEDIR)/Common/Config.cpp \
--- a/README.md
+++ b/README.md
@ -1,6 +1,10 @@
 # CNTK

 ## Latest news
+*2016-05-16.* An example illustrating [Using CNTK with ResNet](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Miscellaneous/ImageNet/ResNet) is added to the codebase. The example contains some pre-trained models that can be used in various applications.
+
+*2016-05-16.* CNTK Wiki now has [FAQ Page](https://github.com/Microsoft/CNTK/wiki/CNTK-FAQ)
+
 *2016-05-05.* CNTK now supports *BlockMomentum* Stochastic Gradient Descent (SGD) algorithm. 
 See the details in the [Multiple GPUs and machines Wiki section](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines)

@ -10,8 +14,6 @@ See the description in the [Image Reader Wiki section](https://github.com/Micros
 *2016-04-25.* V 1.1 Binary release
 CNTK v.1.1 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases/tag/v1.1)

-*2016-04-12.* CNTK is available as [Azure Virtual Machines](https://github.com/Microsoft/CNTK/wiki/CNTK-on-Azure) and [Docker Containers](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers)
-
 See [all news](https://github.com/Microsoft/CNTK/wiki/News).

 ## What is CNTK
--- a/Source/CNTK/BrainScript/BrainScriptEvaluator.cpp
+++ b/Source/CNTK/BrainScript/BrainScriptEvaluator.cpp
@ -874,27 +874,48 @@ public:
    {
        let &config = *configp;
        double &us = *this; // we write to this
-        let arg = config[L"arg"];
        let whatArg = config[L"what"];
        wstring what = whatArg;
-        if (what == L"Floor")
-            us = floor((double) arg);
-        else if (what == L"Length")
+        if (what == L"Floor" || what == L"Length") // one-arg functions
        {
-            if (arg.Is<String>())
-                us = (double) ((wstring &) arg).size();
-            else // otherwise expect an array
+            let arg = config[L"arg"];
+            if (what == L"Floor")
            {
-                let & arr = arg.AsRef<ConfigArray>();
-                let range = arr.GetIndexRange();
-                us = (double) (range.second + 1 - range.first);
+                us = floor((double)arg);
            }
+            else if (what == L"Length")
+            {
+                if (arg.Is<String>())
+                    us = (double)((wstring &)arg).size();
+                else // otherwise expect an array
+                {
+                    let & arr = arg.AsRef<ConfigArray>();
+                    let range = arr.GetIndexRange();
+                    us = (double)(range.second + 1 - range.first);
+                }
+            }
+        }
+        else if (what == L"Mod" || what == L"IntDiv")  //two-arg int functions
+        {
+            let argsArg = config[L"args"];
+            let& args = argsArg.AsRef<ConfigArray>();
+            auto range = args.GetIndexRange();
+            if (range.second != range.first + 1)
+                argsArg.Fail(L"Mod/IntDiv expects two arguments");
+            let arg1 = (int)args.At(range.first);
+            let arg2 = (int)args.At(range.second);
+
+            if (what == L"Mod")
+                us = (int)(arg1 % arg2);
+            else if (what == L"IntDiv")
+                us = (int)(arg1 / arg2);
        }
        else
            whatArg.Fail(L"Unknown 'what' value to NumericFunction: " + what);
    }
 };

+
 // CompareFunctions
 //  - IsSameObject()
 class CompareFunction : public BoxOf<Bool>
--- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
+++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@ -21,6 +21,9 @@ Min(a,b) = if a < b then a else b
 Max(a,b) = if a > b then a else b 
 Fac(n) = if n > 1 then Fac(n-1) * n else 1 
 IsSameObject(a,b) = new CompareFunction [ what = 'IsSameObject' ; args = (a : b) ]
+Mod(x, y)  = new NumericFunction [ what = 'Mod' ;  args = (x:y) ] 
+IntDiv(x, y)  = new NumericFunction [ what = 'IntDiv' ;  args = (x:y) ] 
+

 ##############################################################################
 # comparison functions
@ -69,6 +72,18 @@ CNTK2 = [
    // 3. Shape operations
    // Changes: NewReshape -> Reshape, input -> _, dims -> shape
    Reshape(_, shape, beginAxis=0, endAxis=0, tag='') = new ComputationNode [ operation = 'Reshape' ; inputs = _ ; shape = new TensorShape [ /*shape*/ ] /*plus the function args*/ ]
+    Slice(_, beginIndex, endIndex, axis=1, tag='') =
+        if axis < 0 then [ # time axis: specify -1
+            beginFlags = if beginIndex > 0 then BS.Boolean.Not (BS.Loop.IsFirstN (beginIndex, _)) else                 BS.Loop.IsLastN  (-beginIndex, _)
+            endFlags   = if endIndex   > 0 then                 BS.Loop.IsFirstN (endIndex,   _)  else BS.Boolean.Not (BS.Loop.IsLastN  (-endIndex,   _))
+            flags = if      beginIndex == 0 then endFlags
+                    else if endIndex   == 0 then beginFlags
+                    else                         BS.Boolean.And (beginFlags, endFlags)
+            out = if beginIndex == 0 && endIndex == 0
+                  then _
+                  else BS.Sequences.Gather (flags, _)
+        ].out
+        else new ComputationNode [ operation = 'Slice' ; inputs = _ /*plus the function args*/ ] # non-time axis

    // 4. Tensor operations
    // Changes: Matrix -> Tensor. A -> x, B -> y. Data must come on y ("default parameter") hence not using _
@ -265,7 +280,7 @@ SumElements(matrix, tag='') = new ComputationNode [ operation = 'SumElements' ;
 # ^^ TODO: Rename to ReduceSumMB?
 Tanh(z, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = z /*plus the function args*/ ]
 TimeReverse(vectorSequence, tag='') = new ComputationNode [ operation = 'TimeReverse' ; inputs = vectorSequence /*plus the function args*/ ]
-Trace (node, say='', logFrequency=traceFrequency, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [ operation = 'Trace' ; inputs = node ]
+Trace (node, say='', logFrequency=100, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [ operation = 'Trace' ; inputs = node ]
 TransposeTimes(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'TransposeTimes' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
 Where(cond, tag='') = new ComputationNode [ operation = 'Where' ; inputs = cond /*plus the function args*/ ]

--- a/Source/Common/Include/Eval.h
+++ b/Source/Common/Include/Eval.h
@ -28,6 +28,32 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

+template <typename ElemType>
+class IEvaluateModelBase 
+{
+public:
+    // 
+    // Load a model based on configuration. The syntax is the same as when calling the cntk executable.
+    // e.g. "modelFile=model.dat deviceId=0".
+    // numCPUThreads can be used to set the thread count of BLAS.
+    // 
+    virtual void Init(const std::string& config) = 0;
+
+    //
+    // Create a network based on an (NDL) network description.
+    //
+    virtual void CreateNetwork(const std::string& networkDescription) = 0;
+
+    //
+    // Free resources
+    //
+    virtual void Destroy() = 0;
+};
+
+// ------------------------------------------------------------------------
+// Basic (legacy) interface
+// ------------------------------------------------------------------------
+
 enum NodeGroup
 {
    nodeInput,  // an input node
@ -39,33 +65,54 @@ enum NodeGroup
 // NOTICE: This interface is a public interface for evaluating models in CNTK. 
 //         Changes to this interface may affect other projects, such as Argon and LatGen,
 //         and therefore need to be communicated with such groups.
-template <class ElemType>
-class IEvaluateModel // Evaluate Model Interface
+template <typename ElemType>
+class IEvaluateModel : public IEvaluateModelBase<ElemType> // Evaluate Model Interface
 {
 public:
-    virtual void Init(const std::string& config) = 0;
-    virtual void Destroy() = 0;
-
-    virtual void CreateNetwork(const std::string& networkDescription) = 0;
+    //
+    // Retrieves the (flattened) dimensions 
+    //
    virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup) = 0;
+
+    //
+    // Allocate resources for a particular output.
+    //
    virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName) = 0;
+    
+    //
+    // Evaluate a model in frame mode. This does not support dynamic axes or sparse input data.
+    // Given a feature vector of dimension d, the inputs may contain n * d elements. The output will then be computed 
+    // for n samples.
+    // inputs - map from node name to array of input tensors, flattened to vector
+    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
+    // happen during evaluation
+    // 
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
+
+    //
+    // Evaluate - Evaluate using the network without input and provide the outputs
+    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will 
+    // happen during evaluation
+    //
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
+
    virtual void ResetState() = 0;
 };

+
 // GetEval - get a evaluator type from the DLL
 // since we have 2 evaluator types based on template parameters, exposes 2 exports
 // could be done directly with the templated name, but that requires mangled C++ names
-template <class ElemType>
+template <typename ElemType>
 void EVAL_API GetEval(IEvaluateModel<ElemType>** peval);
 extern "C" EVAL_API void GetEvalF(IEvaluateModel<float>** peval);
 extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval);

+
 // Data Reader class
 // interface for clients of the Data Reader
 // mirrors the IEvaluateModel interface, except the Init method is private (use the constructor)
-template <class ElemType>
+template <typename ElemType>
 class Eval : public IEvaluateModel<ElemType>, protected Plugin
 {
 private:
@ -84,6 +131,7 @@ public:
    // modelPath=c:\models\model.dnn (model path, if not specified, must call LoadModel() method before Evaluate()
    // minibatchSize=1024 (minibatch size used during evaluation if < passed data size)
    Eval(const std::string& config);
+
    virtual ~Eval();

    // CreateNetwork - create a network based on the network description
@ -101,14 +149,146 @@ public:

    // Evaluate - Evaluate using the model with the given inputs and outputs
    // inputs - map from node name to input vector
-    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
+    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will 
+    // happen during evaluation
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);

    // Evaluate - Evaluate using the network without input, and provide the outputs
-    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
+    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will 
+    // happen during evaluation
    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);

    virtual void Init(const std::string& config);
+
    virtual void ResetState();
 };
+
+
+// ------------------------------------------------------------------------
+// Extended interface
+// ------------------------------------------------------------------------
+
+//
+// A buffer to keep data for all samples in a (variable length) sequence 
+// from a single input or output.
+// This is used for both dense and sparse data.
+//
+template<typename ElemType>
+struct VariableBuffer
+{
+    size_t m_numberOfSamples = 0;
+
+    //
+    // All elements of a sequence, concatenated.
+    //
+    std::vector<ElemType> m_buffer;
+
+    // In case of sparse data, the following is also used. Otherwise, the 
+    // contents are ignored.
+
+    // E.g. a sequence of three sparse vectors with 2 / 4 / 2 non-zero values
+    // could be represented as the following:
+    // colIdx:  0   2       6   8
+    //          v   v       v   v
+    // indices  1 3 2 3 5 6 2 7
+    // buffer   0 1 2 3 4 5 6 7
+
+    //
+    // For every element in buffer, an entry in this array gives its position.
+    // For every vector the entries must be ascending.
+    //
+    std::vector<int> m_indices;
+
+    //
+    // Contains numberOfsamples + 1 indices into the buffer. The first entry
+    // is always 0. The last entry points after the last element.
+    // See http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc
+    //
+    std::vector<int> m_colIndices;
+};
+
+//
+// Meta data
+//
+struct VariableLayout
+{
+    enum DataType
+    {
+        Float32,
+        Float64
+    };
+
+    enum StorageType
+    {
+        Undetermined,
+        Dense,
+        Sparse,
+    };
+
+    // Name of the input
+    std::wstring m_name;
+
+    DataType m_dataType;
+
+    StorageType m_storageType;
+
+    // Dimension of the tensor, flattened to 1 dimension, for one entry on the dynamic axis.
+    // E.g. for a tensor [2,3,*] this would be 6.
+    int m_numElements;
+
+    // Name of the axis, potentially shared between inputs. For any two inputs sharing the same
+    // dynamic axis, the sequence cardinality must be the same.
+    std::wstring m_dynamicAxisName;
+};
+
+template <typename ElemType>
+using Variables = std::vector<VariableBuffer<ElemType>>;
+
+using VariableSchema = std::vector<VariableLayout>;
+
+//
+// Extended interface, allowing for sparse input.
+//
+template <typename ElemType>
+class IEvaluateModelExtended : public IEvaluateModelBase<ElemType>
+{
+public:
+    //
+    // GetOutputSchema - retrieve information about tensor shapes and memory layout of the outputs for this
+    // model.
+    //
+    virtual VariableSchema GetOutputSchema() const = 0;
+
+    //
+    // Allocate internal state for calling ForwardPass(). The call restricts the network (inputs and outputs)
+    // to the functions represented by the output name.
+    //
+    virtual void StartForwardEvaluation(std::vector<std::wstring> outputs) = 0;
+
+    //
+    // GetVariableLayout - retrieve information about tensor shapes and memory layout of inputs necessary for a
+    // particular output. By default this returns all available inputs. After StartForwardEvaluation(), this
+    // returns all the inputs necessary to compute the outputs.
+    //
+    virtual VariableSchema GetInputSchema() const = 0;
+
+    //
+    // Evaluate - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and 
+    // outputs.
+    // The layout and shape of the data in inputs vector must match the schema returned by GetInputLayouts.
+    // This method is not reentrant, as the forward pass keeps internal state.
+    // outputId - output to compute values for. See GetOutputLayouts()
+    // inputs - vector of input buffers, one for every input as given by GetInputLayouts()
+    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing 
+    // will happen during evaluation.
+    // Called after StartForwardEvaluation()
+    //
+    virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) = 0;
+};
+
+template <typename ElemType>
+void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval);
+extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended<float>** peval);
+extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval);
+
 } } }
--- a/Source/ComputationNetworkLib/ComputationNetwork.cpp
+++ b/Source/ComputationNetworkLib/ComputationNetwork.cpp
@ -498,7 +498,7 @@ template <class ElemType>
        fprintf(stderr, "Setting dropout rate to %.8g.\n", dropoutRate);
        // TODO: Change this to use an interface that is independent of <ElemType>.
        if (dropoutNodes.size() == 0 && dropoutRate > 0)
-            fprintf(stderr, "WARNING: there is no dropout node.\n");
+            fprintf(stderr, "WARNING: Attempting to set dropout rate, but there is no dropout node in the network.\n");
    }

    // Each dropout node gets a distinct seed. The actual seed for each dropout node is computed as follows:
--- a/Source/ComputationNetworkLib/ComputationNetwork.h
+++ b/Source/ComputationNetworkLib/ComputationNetwork.h
@ -478,6 +478,47 @@ public:
        return std::vector<ComputationNodeBasePtr>{node};
    }

+    std::vector<ComputationNodeBasePtr> OutputNodesByName(const std::vector<std::wstring>& outputNodeNames) 
+    {
+        std::vector<ComputationNodeBasePtr> outputNodes;
+
+        if (outputNodeNames.size() == 0)
+        {
+            if (OutputNodes().size() == 0)
+                RuntimeError("There is no default output node specified in the network.");
+
+            outputNodes = OutputNodes();
+        }
+        else
+        {
+            for (int i = 0; i < outputNodeNames.size(); i++)
+                outputNodes.push_back(GetNodeFromName(outputNodeNames[i]));
+        }
+
+        return outputNodes;
+    }
+
+    // Collect all input nodes that outputNodes depend on.
+    std::vector<ComputationNodeBasePtr> InputNodesForOutputs(const std::vector<std::wstring>& outputNodeNames)
+    {
+        // use map to remove duplicated items
+        auto outputNodes = OutputNodesByName(outputNodeNames);
+
+        std::set<ComputationNodeBasePtr> inputNodesMap;
+        for (auto& onode : outputNodes)
+        {
+            for (auto& inode : InputNodes(onode))
+                inputNodesMap.insert(inode);
+        }
+
+        std::vector<ComputationNodeBasePtr> inputNodes;
+        for (auto& inode : inputNodesMap)
+            inputNodes.push_back(inode);
+
+        return inputNodes;
+    }
+
+
    // these are specified as such by the user
    const std::vector<ComputationNodeBasePtr>& FeatureNodes()        const { return m_featureNodes   ; }
    const std::vector<ComputationNodeBasePtr>& LabelNodes()          const { return m_labelNodes     ; }
--- a/Source/ComputationNetworkLib/ReshapingNodes.h
+++ b/Source/ComputationNetworkLib/ReshapingNodes.h
@ -656,6 +656,7 @@ public:
    WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
        Base(deviceId, name)
    {
+        MarkValueNonSharable();
    }

    virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
@ -696,6 +697,7 @@ public:
    PackedIndexNode(DEVICEID_TYPE deviceId, const wstring& name) :
        Base(deviceId, name)
    {
+        MarkValueNonSharable();
    }

    virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
--- a/Source/EvalDll/CNTKEval.cpp
+++ b/Source/EvalDll/CNTKEval.cpp
@ -18,6 +18,11 @@
 #endif
 #include "BestGpu.h"
 #include "MPIWrapper.h"
+#include "DataDeserializer.h"
+#include "SequencePacker.h"
+#include "NoRandomizer.h"
+#include "HeapMemoryProvider.h"
+#include "InputAndParamNodes.h"

 // TODO: Temporary mechanism to enable memory sharing for
 // node output value matrices. This will go away when the
@ -26,7 +31,50 @@ bool g_shareNodeValueMatrices = false;

 namespace Microsoft { namespace MSR { namespace CNTK {

-template <class ElemType>
+
+template <typename ElemType>
+void CNTKEvalBase<ElemType>::Init(const std::string& config)
+{
+    m_config.Parse(config);
+    size_t nThreads = m_config("numCPUThreads", "1");
+    CPUMatrix<ElemType>::SetNumThreads(nThreads);
+    g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
+}
+
+
+// CreateNetwork - create a network based on the network description
+// networkDescription - network description
+template <typename ElemType>
+void CNTKEvalBase<ElemType>::CreateNetwork(const std::string& networkDescription)
+{
+    ConfigParameters config;
+    config.Parse(networkDescription);
+
+    std::vector<wstring> outputNodeNames;
+    m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
+    
+    if (m_net == nullptr)
+    {
+        LogicError("Unable to construct network from description");
+    }
+}
+
+
+// Destroy - cleanup and remove this class
+// NOTE: this destroys the object, and it can't be used past this point
+template <typename ElemType>
+void CNTKEvalBase<ElemType>::Destroy()
+{
+    // cleanup everything
+    m_net.reset();
+}
+
+
+// ----------------------------------------------------------------------------
+// Basic interface
+// ----------------------------------------------------------------------------
+
+template <typename ElemType>
 void EVAL_API GetEval(IEvaluateModel<ElemType>** peval)
 {
    *peval = new CNTKEval<ElemType>();
@ -41,51 +89,11 @@ extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval)
    GetEval(peval);
 }

-template <class ElemType>
-void CNTKEval<ElemType>::Init(const std::string& config)
-{
-    m_start = 0;
-    m_config.Parse(config);
-    size_t nThreads = m_config("numCPUThreads", "1");
-    CPUMatrix<ElemType>::SetNumThreads(nThreads);
-
-    g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
-}
-
-// Destroy - cleanup and remove this class
-// NOTE: this destroys the object, and it can't be used past this point
-template <class ElemType>
-void CNTKEval<ElemType>::Destroy()
-{
-    // cleanup everything
-    m_net.reset();
-    delete m_reader;
-    delete m_writer;
-    delete this;
-}
-
-// CreateNetwork - create a network based on the network description
-// networkDescription - network description
-template <class ElemType>
-void CNTKEval<ElemType>::CreateNetwork(const std::string& networkDescription)
-{
-    ConfigParameters config;
-    config.Parse(networkDescription);
-
-    std::vector<wstring> outputNodeNames;
-    m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
-    
-    if (m_net == nullptr)
-    {
-        LogicError("Unable to construct network from description");
-    }
-}
-
 // GetNodeDimensions - Get the node dimensions of the specified nodes
 // dimensions - map from name of node to dimension of the node, will be appended to for Input/Output scenarios
 // nodeGroup - type of node we are requesting (input/output/specified)
 // NOTE: when nodeGroup==specified the dimensions map is expected to be populated with the string names of the nodes requested, dimensions will be modified return the current value.
-template <class ElemType>
+template <typename ElemType>
 void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)
 {
    if (m_net == NULL)
@ -137,7 +145,7 @@ void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimen

 // StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
 // ouputNodeName - name of node that will be evaluated
-template <class ElemType>
+template <typename ElemType>
 void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNodeName)
 {
    m_net->StartEvaluateMinibatchLoop(m_net->GetNodeFromName(outputNodeName));
@ -146,7 +154,7 @@ void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNo
 // Evaluate - Evalute using the model with the given inputs and outputs
 // inputs - map from node name to input vector
 // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
-template <class ElemType>
+template <typename ElemType>
 void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)
 {
    size_t minibatchSize = m_config(L"minibatchSize", (size_t) 10240);
@ -183,7 +191,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>

 // Evaluate - Evalute using the model with the given inputs and outputs
 // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
-template <class ElemType>
+template <typename ElemType>
 void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs)
 {
    // get the evaluation names from the output string
@ -206,14 +214,168 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
    eval.WriteOutput(*m_writer, outNodeNames);
 }

-// ResetState - Reset the cell state when we get start of an utterance
-template <class ElemType>
-void CNTKEval<ElemType>::ResetState()
+
+template <typename ElemType>
+void CNTKEval<ElemType>::Destroy()
 {
-    m_start = 1 - m_start;
+    CNTKEvalBase<ElemType>::Destroy();
+    delete m_reader;
+    delete m_writer;
+    delete this;
 }

 // instantiate all the combinations we expect to be used
 template class CNTKEval<double>;
 template class CNTKEval<float>;
+
+// ----------------------------------------------------------------------------
+// Extended interface
+// ----------------------------------------------------------------------------
+
+template<typename ElemType>
+VariableLayout CNTKEvalExtended<ElemType>::ToVariableLayout(const ComputationNodeBasePtr n) 
+{
+    auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(n->ValuePtr());
+    return VariableLayout
+    {
+        /* name */ n->GetName(),
+        /* type */ sizeof(ElemType) == sizeof(float) ? VariableLayout::Float32 : VariableLayout::Float64,
+        /* storage */  matrix ? matrix->GetMatrixType() == MatrixType::DENSE ? VariableLayout::Dense :
+                                matrix->GetMatrixType() == MatrixType::SPARSE ? VariableLayout::Sparse : 
+                                VariableLayout::Undetermined :
+                                VariableLayout::Undetermined,
+        /* dimension */ n->GetSampleLayout().GetNumElements(),
+        /* dynamic axis */ wstring(n->GetMBLayout()->GetAxisName())
+    };
+}
+
+
+template<typename ElemType>
+void CNTKEvalExtended<ElemType>::StartForwardEvaluation(std::vector<wstring> outputNodeNames)
+{
+    m_scopedNetworkOperationMode = make_shared<ScopedNetworkOperationMode>(m_net, NetworkOperationMode::inferring);
+    // allocate memory for forward computation
+    m_outputNodes  = m_net->OutputNodesByName(outputNodeNames);
+    m_inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
+    // allocate memory for forward computation
+    m_net->AllocateAllMatrices({}, m_outputNodes, nullptr);
+    m_net->StartEvaluateMinibatchLoop(m_outputNodes);
+    m_inputMatrices = DataReaderHelpers::RetrieveInputMatrices(m_inputNodes);
+} 
+
+template<typename ElemType>
+VariableSchema CNTKEvalExtended<ElemType>::GetOutputSchema() const
+{
+    VariableSchema schema;
+    for (const auto& n : m_net->OutputNodes())
+    {
+        schema.push_back(ToVariableLayout(n));
+    }
+    return schema;
+}
+
+template<typename ElemType>
+VariableSchema CNTKEvalExtended<ElemType>::GetInputSchema() const
+{
+    VariableSchema inputLayouts;
+    auto nodes = m_inputNodes;
+    if (nodes.size() == 0)
+    {
+        // Default to all nodes
+        nodes = m_net->InputNodesForOutputs({});
+    }
+
+    for (const auto& n : nodes)
+    {
+        inputLayouts.push_back(ToVariableLayout(n));
+    }
+    return inputLayouts;
+}
+
+template<typename ElemType>
+void CNTKEvalExtended<ElemType>::ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output)
+{
+    if (inputs.size() != (size_t)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()))
+    {
+        RuntimeError("Expected %d inputs, but got %d", (int)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()), (int)inputs.size());
+    }
+
+    int i = 0;
+    for (auto& input : m_inputMatrices)
+    {
+        VariableBuffer<ElemType> buffer = inputs[i];
+        int numRows = input.second.sampleLayout.GetNumElements(); 
+        int numCols = buffer.m_numberOfSamples;
+        shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
+        auto type = matrix->GetMatrixType(); 
+
+        input.second.pMBLayout->Init(1, numCols);
+        input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
+       
+        if (type == MatrixType::DENSE)
+        {
+            matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
+        }
+        else if (type == MatrixType::SPARSE)
+        {
+            // In the sparse case the m_data layout is identical to CUDA's CSC layout
+            // (see http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc).
+            matrix->SetMatrixFromCSCFormat(buffer.m_colIndices.data(), buffer.m_indices.data(), buffer.m_buffer.data(), buffer.m_buffer.size(), numRows, numCols);
+        }
+
+        ++i;
+    }
+
+    ComputationNetwork::BumpEvalTimeStamp(m_inputNodes);
+    
+    for (int i = 0; i < m_outputNodes.size(); ++i)
+    {
+        auto node = m_outputNodes[i];
+        m_net->ForwardProp(node);
+        shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr());
+        auto pMBLayout = node->GetMBLayout();
+        if (!pMBLayout) 
+        {
+            pMBLayout = make_shared<MBLayout>();
+            pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample
+        }
+
+        const auto& seq = pMBLayout->GetAllSequences();
+        if (seq.size() != 1)
+        {
+            RuntimeError("Only 1 sequence supported by this API"); // TODO
+        }
+        std::vector<ElemType>& vec = output[i].m_buffer;
+        
+        vec.resize(outputMatrix->GetNumElements());
+        ElemType* data = const_cast<ElemType*>(vec.data());
+        size_t numElements = outputMatrix->GetNumElements();
+        outputMatrix->CopyToArray(data, numElements);
+    }
+}
+
+template <typename ElemType>
+void CNTKEvalExtended<ElemType>::Destroy()
+{
+    CNTKEvalBase<ElemType>::Destroy();
+    delete this;
+}
+
+template <typename ElemType>
+void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval)
+{
+    *peval = new CNTKEvalExtended<ElemType>();
+}
+
+extern "C" EVAL_API void  GetEvalExtendedF(IEvaluateModelExtended<float>** peval)
+{
+    GetEvalExtended(peval);
+}
+extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval)
+{
+    GetEvalExtended(peval);
+}
+
+template class CNTKEvalExtended<double>;
+template class CNTKEvalExtended<float>;
 } } }
--- a/Source/EvalDll/CNTKEval.h
+++ b/Source/EvalDll/CNTKEval.h
@ -22,48 +22,97 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

-template <class ElemType>
-class CNTKEval : public IEvaluateModel<ElemType>
+template <typename ElemType>
+class CNTKEvalBase : public IEvaluateModelBase<ElemType>
 {
+protected:
    typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
-    EvalReader<ElemType>* m_reader;
-    EvalWriter<ElemType>* m_writer;
    ConfigParameters m_config;
    ComputationNetworkPtr m_net;
-    std::map<std::wstring, size_t> m_dimensions;
-    size_t m_start;

-public:
    // constructor
-    CNTKEval()
-        : m_reader(nullptr), m_net(nullptr)
-    {
-    }
+    CNTKEvalBase() : m_net(nullptr) { }
+public:

    // CreateNetwork - create a network based on the network description
    // networkDescription - network description
    virtual void CreateNetwork(const std::string& networkDescription);
-
-    // GetNodeDimensions - Get the node dimensions of the specified nodes
-    // dimensions - map from name of node to dimension of the node
-    // nodeGroup - type of node we are requesting (input/output/specified)
-    virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
-
-    // StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
-    // ouputNodeName - name of node that will be evaluated
-    virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
-
-    // Evaluate - Evalute using the model with the given inputs and outputs
-    // inputs - map from node name to input vector
-    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
-    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
-
-    // Evaluate - Evalute using the model with the given inputs and outputs
-    // outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
-    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
-
    virtual void Init(const std::string& config);
    virtual void Destroy();
-    virtual void ResetState();
+};
+
+// ------------------------------------------------------------------------
+// Basic interface
+// ------------------------------------------------------------------------
+template <typename ElemType>
+class CNTKEval : public CNTKEvalBase<ElemType>, public IEvaluateModel<ElemType>
+{
+    EvalReader<ElemType>* m_reader;
+    EvalWriter<ElemType>* m_writer;
+    std::map<std::wstring, size_t> m_dimensions;
+    size_t m_start;
+public:
+    CNTKEval() : CNTKEvalBase<ElemType>(), m_reader(nullptr), m_writer(nullptr) {}
+
+    virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
+
+    virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
+
+    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
+
+    virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
+
+    virtual void Destroy() override;
+
+    virtual void CreateNetwork(const std::string& networkDescription) override
+    {
+        CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
+    }
+    
+    virtual void Init(const std::string& config) override
+    {
+        CNTKEvalBase<ElemType>::Init(config);
+        m_start = 0;
+    }
+
+    virtual void ResetState() override
+    {
+        m_start = 1 - m_start;
+    }
+};
+
+
+
+// ------------------------------------------------------------------------
+// Extended interface
+// ------------------------------------------------------------------------
+template <typename ElemType>
+class CNTKEvalExtended : public CNTKEvalBase<ElemType>, public IEvaluateModelExtended<ElemType>
+{
+    virtual VariableSchema GetOutputSchema() const override;
+
+    virtual void StartForwardEvaluation(std::vector<wstring> outputs) override;
+
+    virtual VariableSchema GetInputSchema() const override;
+
+    virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) override;
+
+    virtual void Destroy() override;
+
+    virtual void CreateNetwork(const std::string& networkDescription) override
+    {
+        CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
+    }
+
+    virtual void Init(const std::string& config) override
+    {
+        CNTKEvalBase<ElemType>::Init(config);
+    }
+private:
+    static VariableLayout ToVariableLayout(const ComputationNodeBasePtr n);
+    std::vector<ComputationNodeBasePtr> m_outputNodes;
+    std::shared_ptr<ScopedNetworkOperationMode> m_scopedNetworkOperationMode;
+    std::vector<ComputationNodeBasePtr> m_inputNodes;
+    StreamMinibatchInputs m_inputMatrices;
 };
 } } }
--- a/Source/EvalDll/EvalDll.vcxproj
+++ b/Source/EvalDll/EvalDll.vcxproj
@ -55,8 +55,8 @@
    <TargetName>EvalDll</TargetName>
  </PropertyGroup>
  <ItemDefinitionGroup>
-    <ClCompile>
-      <AdditionalIncludeDirectories>$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
+      <ClCompile>
+      <AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath)</AdditionalLibraryDirectories>
@ -99,7 +99,7 @@
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib;ReaderLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
      <Profile>true</Profile>
      <DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
    </Link>
@ -153,4 +153,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/Source/EvalDll/EvalDll.vcxproj.filters
+++ b/Source/EvalDll/EvalDll.vcxproj.filters
@ -2,39 +2,18 @@
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <ClCompile Include="CNTKEval.cpp" />
-    <ClCompile Include="..\Common\fileutil.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\Common\File.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\Common\TimerUtility.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
    <ClCompile Include="dllmain.cpp">
      <Filter>Misc</Filter>
    </ClCompile>
    <ClCompile Include="stdafx.cpp">
      <Filter>Misc</Filter>
    </ClCompile>
-    <ClCompile Include="..\Common\Config.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\Common\Eval.cpp">
-      <Filter>For External Use</Filter>
-    </ClCompile>
-    <ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
    <ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp">
      <Filter>BrainScript</Filter>
    </ClCompile>
    <ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp">
      <Filter>BrainScript</Filter>
    </ClCompile>
-    <ClCompile Include="..\Common\DataReader.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="EvalReader.h" />
--- a/Source/Math/CPUMatrix.cpp
+++ b/Source/Math/CPUMatrix.cpp
@ -715,11 +715,12 @@ void CPUMatrix<ElemType>::SetValue(const ElemType v)
    }
    else
    {
-		ElemType* bufPtr = Data();
+        ElemType* bufPtr = Data();
        long m = (long) GetNumElements();
        // 2-way thread parallelism is sufficient for the memory bound
        // operation of just setting the values of an array.
        const unsigned SETVALUE_NUM_THREADS = 2;
+        UNUSED(SETVALUE_NUM_THREADS); // in case OMP is turned off.
 #pragma omp parallel for num_threads(SETVALUE_NUM_THREADS)
        // four-way unrolling
        for (long i = 0; i < (m & ~3); i += 4)
@ -1382,7 +1383,6 @@ void CPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols
 // Resize() -- change matrix size
 // This function is cheap if the matrix size does not change.
 // Current content is not preserved.
-// BUGBUG: There is code that relies on zero initialization (without, we get subtle variations of output). That is wrong--we should initialize to QNaN and see where it fails.
 // If growOnly is true, resize will not reallocate memory if the current memory is large enough (i.e., will not shrink).
 // If this object does not own its memory then new memory cannot be allocated (one can still shrink and/or reshape).
 template <class ElemType>
@ -1411,8 +1411,9 @@ void CPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
    }

    // success
-    m_numRows = numRows;
-    m_numCols = numCols;
+    m_sliceViewOffset = 0;
+    m_numRows         = numRows;
+    m_numCols         = numCols;
 }

 // allocated by the callee but should be deleted by the caller
--- a/Source/Math/CPUSparseMatrix.cpp
+++ b/Source/Math/CPUSparseMatrix.cpp
@ -1384,6 +1384,7 @@ template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
 template char* CPUSparseMatrix<char>::Data() const;
 template char* CPUSparseMatrix<char>::Data();
 template void CPUSparseMatrix<char>::Reset(void);
+template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
 template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
 template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const MatrixFormat, const bool, bool);
 template CPUSparseMatrix<char>::~CPUSparseMatrix();
--- a/Source/Math/GPUMatrix.cu
+++ b/Source/Math/GPUMatrix.cu
@ -1467,29 +1467,27 @@ void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
    if (GetNumRows() == numRows && GetNumCols() == numCols)
        return;

+    size_t numElements = numRows * numCols;
+    if (numElements > GetSizeAllocated() ||                 // grow allocation
+        (!growOnly && numElements != GetSizeAllocated()))   // shrink allocation if not growOnly
+    {
+        // reallocate buffer if numElements > 0
+        ElemType* pArray = nullptr;
+        if (numElements > 0)
+            pArray = TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), numRows, numCols);
+
+        // If the buffer exists, free it
+        if (Buffer())
+            TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
+
+        SetBuffer(pArray, numElements * sizeof(ElemType));
+        SetSizeAllocated(numElements);
+    }
+    
+    // success
+    m_sliceViewOffset = 0;
    m_numRows = numRows;
    m_numCols = numCols;
-
-    size_t numElements = GetNumElements();
-    if (numElements > GetSizeAllocated() || (!growOnly && numElements != GetSizeAllocated()))
-    {
-        if (IsEmpty())
-        {
-            SetSizeAllocated(0);
-            SetBuffer(nullptr, 0);
-        }
-        else
-        {
-            if (Buffer())
-            {
-                TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
-            }
-            SetSizeAllocated(numElements);
-            SetBuffer(TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), m_numRows, m_numCols), numElements * sizeof(ElemType));
-            CUDA_CALL(cudaMemset(Buffer(), 0, sizeof(ElemType) * GetSizeAllocated()));
-        }
-    }
-    m_sliceViewOffset = 0;
 }

 template <class ElemType>
--- a/Source/Math/Math.vcxproj.filters
+++ b/Source/Math/Math.vcxproj.filters
@ -2,12 +2,6 @@
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <ClCompile Include="Matrix.cpp" />
-    <ClCompile Include="..\Common\File.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\Common\fileutil.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
    <ClCompile Include="CPUMatrix.cpp">
      <Filter>CPU</Filter>
    </ClCompile>
--- a/Source/Math/Matrix.cpp
+++ b/Source/Math/Matrix.cpp
@ -1223,32 +1223,32 @@ void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
    DISPATCH_MATRIX_ON_FLAG(this, this,
        { 
            // Set CPUMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
                { m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUMatrix is not yet implemented."); },//{ m_CPUMatrix->SetValue(deepCopyFrom.m_GPUMatrix->GetNumRows(), deepCopyFrom.m_GPUMatrix->GetNumCols(), deepCopyFrom.m_GPUMatrix->CopyToArray()); },// //{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
+                { LogicError("AssignValuesOf: Assigning a CPUSparseMatrix to a CPUMatrix is not yet implemented."); },//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUMatrix is not yet implemented."); });//{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
        },
        { 
            // Set GPUMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUMatrix is not yet implemented."); },//{ m_GPUMatrix->SetValue(deepCopyFrom.m_CPUMatrix->GetNumRows(), deepCopyFrom.m_CPUMatrix->GetNumCols(), m_GPUMatrix->GetComputeDeviceId(), deepCopyFrom.m_CPUMatrix->Data()); },////{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                { m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a CPUSparseMatrix to a GPUMatrix is not yet implemented."); },//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a GPUMatrix is not yet implemented."); });//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
        },
        { 
            // Set CPUSparseMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
-                NOT_IMPLEMENTED,//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a CPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+                { LogicError("AssignValuesOf: Assigning a GPUMatrix to a CPUSparseMatrix is not yet implemented."); },//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                { m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
-                NOT_IMPLEMENTED);//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
+                { LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a CPUSparseMatrix is not yet implemented."); });//{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
        },
        { 
            // Set GPUSparseMatrix from:
-            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
-                NOT_IMPLEMENTED,//{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
+            DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, nullptr,
+                { LogicError("AssignValuesOf: Assigning a CPUMatrix to a GPUSparseMatrix is not yet implemented."); },//{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
                { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
                { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
                { m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
@ -3610,13 +3610,17 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
            if (!m_CPUSparseMatrix)
                LogicError("Can't move from CPU because I'm not there!");

-            if (!m_GPUSparseMatrix)
-                m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id, m_CPUSparseMatrix->GetFormat());
-            else
-                m_GPUSparseMatrix->ChangeDeviceTo(to_id);
-
-            if (m_CPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
+            if (emptyTransfer)
            {
+                if (m_GPUSparseMatrix && m_GPUSparseMatrix->GetComputeDeviceId() == to_id)
+                    m_GPUSparseMatrix->Resize(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount());
+                else
+                    m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(m_CPUSparseMatrix->GetNumRows(), m_CPUSparseMatrix->GetNumCols(), m_CPUSparseMatrix->NzCount(), to_id, m_CPUSparseMatrix->GetFormat());
+            }
+            else
+            {
+                if (!m_GPUSparseMatrix || m_GPUSparseMatrix->GetComputeDeviceId() != to_id)
+                    m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(to_id);
                m_GPUSparseMatrix->SetValue(*m_CPUSparseMatrix);
            }

@ -3640,10 +3644,10 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                if (!m_CPUSparseMatrix)
                    m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(m_GPUSparseMatrix->GetFormat());

-                if (m_GPUSparseMatrix->GetNumElements() != 0 && !emptyTransfer)
-                {
+                if (emptyTransfer)
+                    m_CPUSparseMatrix->Resize(m_GPUSparseMatrix->GetNumRows(), m_GPUSparseMatrix->GetNumCols(), m_GPUSparseMatrix->NzCount(), true);
+                else
                    m_GPUSparseMatrix->CopyToCPUSparseMatrix(*m_CPUSparseMatrix);
-                }

                if (isBeingMoved)
                {
@ -3668,13 +3672,19 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
        {
            if (!m_CPUMatrix)
                LogicError("Can't move from CPU because I'm not there!");
-            if (m_CPUMatrix->GetNumElements() != 0 && !emptyTransfer)
+            if (emptyTransfer)
            {
-                m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Buffer(), matrixFlagNormal);
+                if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
+                    m_GPUMatrix->Resize(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols());
+                else
+                    m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id);
            }
            else
            {
-                m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(to_id);
+                if (m_GPUMatrix && m_GPUMatrix->GetComputeDeviceId() == to_id)
+                    m_GPUMatrix->SetValue(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
+                else
+                    m_GPUMatrix = make_shared<GPUMatrix<ElemType>>(m_CPUMatrix->GetNumRows(), m_CPUMatrix->GetNumCols(), to_id, m_CPUMatrix->Data());
            }
            if (isBeingMoved)
            {
@ -3682,9 +3692,7 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool
                m_CPUMatrix = nullptr;
            }
            else
-            {
                SetDataLocation(BOTH, DENSE);
-            }
        }
        else // from GPU
        {
@ -3693,15 +3701,22 @@ void Matrix<ElemType>::_transferFromDeviceToDevice(int from_id, int to_id, bool

            if (to_id < 0) // to CPU
            {
-                if (m_GPUMatrix->GetNumElements() != 0 && !emptyTransfer)
+                if (emptyTransfer)
                {
-                    ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
-                    m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
-                    delete[] arr;
+                    if (m_CPUMatrix)
+                        m_CPUMatrix->Resize(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
+                    else
+                        m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols());
                }
                else
                {
-                    m_CPUMatrix = make_shared<CPUMatrix<ElemType>>();
+                    ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?
+                    if (m_CPUMatrix)
+                        m_CPUMatrix->SetValue(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr);
+                    else
+                        m_CPUMatrix = make_shared<CPUMatrix<ElemType>>(m_GPUMatrix->GetNumRows(), m_GPUMatrix->GetNumCols(), arr, matrixFlagNormal);
+
+                    delete[] arr;
                }

                if (isBeingMoved)
--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.cpp
@ -7,13 +7,18 @@
 #include "CNTKTextFormatReader.h"
 #include "Config.h"
 #include "TextConfigHelper.h"
+#include "ChunkCache.h"
 #include "BlockRandomizer.h"
 #include "NoRandomizer.h"
 #include "TextParser.h"
 #include "SequencePacker.h"
+#include "FramePacker.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

+// TODO: This class should go away eventually.
+// TODO: The composition of packer + randomizer + different deserializers in a generic manner is done in the CompositeDataReader.
+// TODO: Currently preserving this for backward compatibility with current configs.
 CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
    const ConfigParameters& config) :
    m_provider(provider)
@ -31,28 +36,37 @@ CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
            m_deserializer = shared_ptr<IDataDeserializer>(new TextParser<double>(configHelper));
        }

-        TransformerPtr randomizer;
+        if (configHelper.ShouldKeepDataInMemory()) 
+        {
+            m_deserializer = shared_ptr<IDataDeserializer>(new ChunkCache(m_deserializer));
+        }
+
        size_t window = configHelper.GetRandomizationWindow();
        if (window > 0)
        {
            // Verbosity is a general config parameter, not specific to the text format reader.
            int verbosity = config(L"verbosity", 2);
-            randomizer = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
+            m_randomizer = make_shared<BlockRandomizer>(verbosity, window, m_deserializer);
        }
        else
        {
-            randomizer = std::make_shared<NoRandomizer>(m_deserializer);
+            m_randomizer = std::make_shared<NoRandomizer>(m_deserializer);
        }

-        randomizer->Initialize(nullptr, config);
-
-        m_transformer = randomizer;
-
-        // TODO: add "frameMode"  config paramter
+        if (configHelper.IsInFrameMode()) 
+        {
+            m_packer = std::make_shared<FramePacker>(
+                m_provider,
+                m_randomizer,
+                GetStreamDescriptions());
+        }
+        else
+        {
        m_packer = std::make_shared<SequencePacker>(
            m_provider,
-            m_transformer,
+            m_randomizer,
            GetStreamDescriptions());
+        }
    }
    catch (const std::runtime_error& e)
    {
@ -72,7 +86,7 @@ void CNTKTextFormatReader::StartEpoch(const EpochConfiguration& config)
        RuntimeError("Epoch size cannot be 0.");
    }

-    m_transformer->StartEpoch(config);
+    m_randomizer->StartEpoch(config);
    m_packer->StartEpoch(config);
 }

--- a/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
+++ b/Source/Readers/CNTKTextFormatReader/CNTKTextFormatReader.h
@ -8,6 +8,7 @@
 #include "TextParser.h"
 #include "Reader.h"
 #include "Packer.h"
+#include "SequenceEnumerator.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -32,8 +33,8 @@ public:
 private:
    IDataDeserializerPtr m_deserializer;

-    // A head transformer in a list of transformers.
-    TransformerPtr m_transformer;
+    // Randomizer.
+    SequenceEnumeratorPtr m_randomizer;

    // Packer.
    PackerPtr m_packer;
--- a/Source/Readers/CNTKTextFormatReader/Descriptors.h
+++ b/Source/Readers/CNTKTextFormatReader/Descriptors.h
@ -56,6 +56,60 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    // A collection of chunk descriptors, each containing
    // a collection of sequence descriptors for the corresponding
-    // chunk of the input data. 
-    typedef std::vector<ChunkDescriptor> Index;
+    // chunk of the input data.
+    // It also stores a mapping of keys into sequence descriptors.
+    struct Index
+    {
+        std::vector<ChunkDescriptor> m_chunks;                                  // chunks
+        std::map<size_t, std::pair<size_t, size_t>> m_keyToSequenceInChunk;     // sequence key -> sequence location in chunk
+        const size_t m_maxChunkSize;                                            // maximum chunk size in bytes
+
+        explicit Index(size_t chunkSize) : m_maxChunkSize(chunkSize)
+        {}
+
+        // Adds sequence (metadata) to the index. Additionally, it
+        // assigns an appropriate chunk id to the sequence descriptor,
+        // ensures that chunks do not exceed the maximum allowed size
+        // (except when a sequence size is greater than the maximum chunk size)
+        void AddSequence(SequenceDescriptor& sd)
+        {
+            assert(!m_chunks.empty());
+            ChunkDescriptor* chunk = &m_chunks.back();
+            if (chunk->m_byteSize > 0 && (chunk->m_byteSize + sd.m_byteSize) > m_maxChunkSize)
+            {
+                // Creating a new chunk if the size is exceeded.
+                m_chunks.push_back({});
+                chunk = &m_chunks.back();
+                chunk->m_id = m_chunks.size() - 1;
+            }
+
+            chunk->m_byteSize += sd.m_byteSize;
+            chunk->m_numberOfSequences++;
+            chunk->m_numberOfSamples += sd.m_numberOfSamples;
+            sd.m_chunkId = chunk->m_id;
+            sd.m_id = chunk->m_sequences.size();
+            auto location = std::make_pair(chunk->m_id, sd.m_id);
+            m_keyToSequenceInChunk.insert(std::make_pair(sd.m_key.m_sequence, location));
+            chunk->m_sequences.push_back(sd);
+        }
+
+        // Reserves inner structures for the specified number of bytes.
+        void Reserve(size_t sizeInBytes)
+        {
+            if (m_maxChunkSize > 0)
+            {
+                m_chunks.reserve((sizeInBytes + m_maxChunkSize - 1) / m_maxChunkSize);
+            }
+
+            m_chunks.push_back({});
+        }
+
+        // Checks if the index is empty.
+        bool IsEmpty() const
+        {
+            return m_chunks.empty();
+        }
+
+        DISABLE_COPY_AND_MOVE(Index);
+    };
 }}}
--- a/Source/Readers/CNTKTextFormatReader/Exports.cpp
+++ b/Source/Readers/CNTKTextFormatReader/Exports.cpp
@ -11,7 +11,7 @@
 #include "ReaderShim.h"
 #include "CNTKTextFormatReader.h"
 #include "HeapMemoryProvider.h"
-#include "CudaMemoryProvider.h"
+#include "StringUtil.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -32,4 +32,30 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader)
    *preader = new ReaderShim<double>(factory);
 }

+// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
+// A factory method for creating text deserializers.
+extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
+{
+    string precision = deserializerConfig.Find("precision", "float");
+    if (!AreEqualIgnoreCase(precision, "float") && !AreEqualIgnoreCase(precision, "double"))
+    {
+        InvalidArgument("Unsupported precision '%s'", precision.c_str());
+    }
+
+    // TODO: Remove type from the parser. Current implementation does not support streams of different types.
+    if (type == L"CNTKTextFormatDeserializer")
+    {
+        if (precision == "float")
+            *deserializer = new TextParser<float>(corpus, TextConfigHelper(deserializerConfig));
+        else // double
+            *deserializer = new TextParser<double>(corpus, TextConfigHelper(deserializerConfig));
+    }
+    else
+        InvalidArgument("Unknown deserializer type '%ls'", type.c_str());
+
+    // Deserializer created.
+    return true;
+}
+
+
 }}}
--- a/Source/Readers/CNTKTextFormatReader/Indexer.cpp
+++ b/Source/Readers/CNTKTextFormatReader/Indexer.cpp
@ -23,7 +23,7 @@ Indexer::Indexer(FILE* file, bool skipSequenceIds, size_t chunkSize) :
    m_pos(nullptr),
    m_done(false),
    m_hasSequenceIds(!skipSequenceIds),
-    m_maxChunkSize(chunkSize)
+    m_index(chunkSize)
 {
    if (m_file == nullptr)
    {
@ -53,24 +53,7 @@ void Indexer::RefillBuffer()
    }
 }

-void Indexer::AddSequence(SequenceDescriptor& sd)
-{
-    assert(!m_chunks.empty());
-    ChunkDescriptor* chunk = &m_chunks.back();
-    if (chunk->m_byteSize > 0 && (chunk->m_byteSize + sd.m_byteSize) > m_maxChunkSize)
-    {
-        m_chunks.push_back({});
-        chunk = &m_chunks.back();
-        chunk->m_id = m_chunks.size() - 1;
-    }
-    chunk->m_byteSize += sd.m_byteSize;
-    chunk->m_numberOfSequences++;
-    chunk->m_numberOfSamples += sd.m_numberOfSamples;
-    sd.m_chunkId = chunk->m_id;
-    chunk->m_sequences.push_back(sd);
-}
-
-void Indexer::BuildFromLines()
+void Indexer::BuildFromLines(CorpusDescriptorPtr corpus)
 {
    assert(m_pos == m_bufferStart);
    m_hasSequenceIds = false;
@ -82,13 +65,12 @@ void Indexer::BuildFromLines()
        if (m_pos)
        {
            SequenceDescriptor sd = {};
-            sd.m_id = lines;
            sd.m_numberOfSamples = 1;
            sd.m_isValid = true;
            sd.m_fileOffsetBytes = offset;
            offset = GetFileOffset() + 1;
            sd.m_byteSize = offset - sd.m_fileOffsetBytes;
-            AddSequence(sd);
+            AddSequenceIfIncluded(corpus, lines, sd);
            ++m_pos;
            ++lines;
        }
@ -103,30 +85,22 @@ void Indexer::BuildFromLines()
        // There's a number of characters, not terminated by a newline,
        // add a sequence to the index, parser will have to deal with it.
        SequenceDescriptor sd = {};
-        sd.m_id = lines;
        sd.m_numberOfSamples = 1;
        sd.m_isValid = true;
        sd.m_fileOffsetBytes = offset;
        sd.m_byteSize = m_fileOffsetEnd - sd.m_fileOffsetBytes;
-        AddSequence(sd);
+        AddSequenceIfIncluded(corpus, lines, sd);
    }
-
 }

-void Indexer::Build()
+void Indexer::Build(CorpusDescriptorPtr corpus)
 {
-    if (!m_chunks.empty())
+    if (!m_index.IsEmpty())
    {
        return;
    }

-    if (m_maxChunkSize > 0)
-    {
-        auto fileSize = filesize(m_file);
-        m_chunks.reserve((fileSize + m_maxChunkSize - 1) / m_maxChunkSize);
-    }
-
-    m_chunks.push_back({});
+    m_index.Reserve(filesize(m_file));

    RefillBuffer(); // read the first block of data
    if (m_done)
@ -147,7 +121,7 @@ void Indexer::Build()
    if (!m_hasSequenceIds || m_bufferStart[0] == NAME_PREFIX)
    {
        // skip sequence id parsing, treat lines as individual sequences
-        BuildFromLines();
+        BuildFromLines(corpus);
        return;
    }

@ -160,33 +134,45 @@ void Indexer::Build()
    }

    SequenceDescriptor sd = {};
-    sd.m_id = id;
    sd.m_fileOffsetBytes = offset;
    sd.m_isValid = true;

+    size_t currentKey = id;
    while (!m_done)
    {
        SkipLine(); // ignore whatever is left on this line.
        offset = GetFileOffset(); // a new line starts at this offset;
        sd.m_numberOfSamples++;

-        if (!m_done && TryGetSequenceId(id) && id != sd.m_id)
+        if (!m_done && TryGetSequenceId(id) && id != currentKey)
        {
            // found a new sequence, which starts at the [offset] bytes into the file
            sd.m_byteSize = offset - sd.m_fileOffsetBytes;
-            AddSequence(sd);
+            AddSequenceIfIncluded(corpus, currentKey, sd);
+
            sd = {};
-            sd.m_id = id;
            sd.m_fileOffsetBytes = offset;
            sd.m_isValid = true;
+            currentKey = id;
        }
    }

    // calculate the byte size for the last sequence
    sd.m_byteSize = m_fileOffsetEnd - sd.m_fileOffsetBytes;
-    AddSequence(sd);
+    AddSequenceIfIncluded(corpus, currentKey, sd);
 }

+void Indexer::AddSequenceIfIncluded(CorpusDescriptorPtr corpus, size_t sequenceKey, SequenceDescriptor& sd)
+{
+    auto& stringRegistry = corpus->GetStringRegistry();
+    auto key = msra::strfun::utf16(std::to_string(sequenceKey));
+    if (corpus->IsIncluded(key))
+    {
+        sd.m_key.m_sequence = stringRegistry[key];
+        sd.m_key.m_sample = 0;
+        m_index.AddSequence(sd);
+    }
+}

 void Indexer::SkipLine()
 {
--- a/Source/Readers/CNTKTextFormatReader/Indexer.h
+++ b/Source/Readers/CNTKTextFormatReader/Indexer.h
@ -8,6 +8,7 @@
 #include <stdint.h>
 #include <vector>
 #include "Descriptors.h"
+#include "CorpusDescriptor.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -23,10 +24,10 @@ public:

    // Reads the input file, building and index of chunks and corresponding
    // sequences.
-    void Build();
+    void Build(CorpusDescriptorPtr corpus);

    // Returns input data index (chunk and sequence metadata)
-    const Index& GetIndex() const { return m_chunks; }
+    const Index& GetIndex() const { return m_index; }

    // True, when input does not have the sequence id column
    // or when sequence id column was ignored during indexing
@ -49,15 +50,11 @@ private:
    bool m_hasSequenceIds; // true, when input contains one sequence per line 
                           // or when sequence id column was ignored during indexing.

-    const size_t m_maxChunkSize; // maximum permitted chunk size;
+    // a collection of chunk descriptors and sequence keys.
+    Index m_index;

-    std::vector<ChunkDescriptor> m_chunks; // a collection of chunk descriptors
-
-    // Adds sequence (metadata) to the index. Additionally, it
-    // assigns an appropriate chunk id to the sequence descriptor,
-    // ensures that chunks do not exceed the maximum allowed size
-    // (except when a sequence size is greater than the maximum chunk size)
-    void AddSequence(SequenceDescriptor& sd);
+    // Same function as above but with check that the sequence is included in the corpus descriptor.
+    void AddSequenceIfIncluded(CorpusDescriptorPtr corpus, size_t sequenceKey, SequenceDescriptor& sd);

    // fills up the buffer with data from file, all previously buffered data
    // will be overwritten.
@ -76,7 +73,7 @@ private:
    // Build a chunk/sequence index, treating each line as an individual sequence.
    // Does not do any sequence parsing, instead uses line number as 
    // the corresponding sequence id.
-    void BuildFromLines();
+    void BuildFromLines(CorpusDescriptorPtr corpus);

    // Returns current offset in the input file (in bytes). 
    int64_t GetFileOffset() const { return m_fileOffsetStart + (m_pos - m_bufferStart); }
--- a/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp
+++ b/Source/Readers/CNTKTextFormatReader/TextConfigHelper.cpp
@ -4,6 +4,7 @@
 //

 #include "stdafx.h"
+#define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 #include <limits>
 #include "TextConfigHelper.h"
@ -116,32 +117,36 @@ TextConfigHelper::TextConfigHelper(const ConfigParameters& config)

    m_filepath = msra::strfun::utf16(config(L"file"));

-    if (config.Exists(L"randomize"))
+    // EvalActions inserts randomize = "none" into the reader config in DoWriteOutoput.
+    wstring randomizeString = config(L"randomize", wstring());
+    if (!_wcsicmp(randomizeString.c_str(), L"none"))
    {
-        wstring randomizeString = config.CanBeString(L"randomize") ? config(L"randomize") : wstring();
-        if (!_wcsicmp(randomizeString.c_str(), L"none"))
-        {
-            m_randomizationWindow = randomizeNone;
-        }
-        else if (!_wcsicmp(randomizeString.c_str(), L"auto"))
-        {
-            m_randomizationWindow = randomizeAuto;
-        }
-        else
-        {
-            m_randomizationWindow = config(L"randomize");
-        }
+        m_randomizationWindow = randomizeNone;
    }
    else
    {
-        m_randomizationWindow = randomizeAuto;
+        bool randomize = config(L"randomize", true);
+
+        if (!randomize)
+        {
+            m_randomizationWindow = randomizeNone;
+        }
+        else if (config.Exists(L"randomizationWindow"))
+        {
+            m_randomizationWindow = config(L"randomizationWindow");
+        }
+        else
+        {
+            m_randomizationWindow = randomizeAuto;
+        }
    }

    m_skipSequenceIds = config(L"skipSequenceIds", false);
    m_maxErrors = config(L"maxErrors", 0);
-    m_traceLevel = config(L"traceLevel", 0);
+    m_traceLevel = config(L"traceLevel", 1);
    m_chunkSizeBytes = config(L"chunkSizeInBytes", 32 * 1024 * 1024); // 32 MB by default
-    m_chunkCacheSize = config(L"numChunksToCache", 32); // 32 * 32 MB = 1 GB of memory in total
+    m_keepDataInMemory = config(L"keepDataInMemory", false);
+    m_frameMode = config(L"frameMode", false);
 }

 }}}
--- a/Source/Readers/CNTKTextFormatReader/TextConfigHelper.h
+++ b/Source/Readers/CNTKTextFormatReader/TextConfigHelper.h
@ -35,7 +35,9 @@ public:

    size_t GetChunkSize() const { return m_chunkSizeBytes; }

-    unsigned int GetNumChunksToCache() const { return m_chunkCacheSize; }
+    bool ShouldKeepDataInMemory() const { return m_keepDataInMemory; }
+
+    bool IsInFrameMode() const { return m_frameMode; }

    ElementType GetElementType() const { return m_elementType; }

@ -50,7 +52,8 @@ private:
    unsigned int m_maxErrors;
    unsigned int m_traceLevel;
    size_t m_chunkSizeBytes; // chunks size in bytes
-    unsigned int m_chunkCacheSize; // number of chunks to keep in the memory
+    bool m_keepDataInMemory; // if true the whole dataset is kept in memory
+    bool m_frameMode; // if true, the maximum expected sequence length in the dataset is one sample.
 };

 } } }
--- a/Source/Readers/CNTKTextFormatReader/TextParser.cpp
+++ b/Source/Readers/CNTKTextFormatReader/TextParser.cpp
@ -16,12 +16,6 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

-inline bool isDelimiter(char c)
-{
-    return c == VALUE_DELIMITER || c == NAME_PREFIX || c == COLUMN_DELIMITER ||
-        c == INDEX_DELIMITER || c == ROW_DELIMITER || c == CARRIAGE_RETURN;
-}
-
 enum State
 {
    Init = 0,
@ -48,10 +42,6 @@ public:

    // chunk id (copied from the descriptor)
    size_t m_id;
-    // Keeps track of how many times GetSequence was called.
-    // When this counter value reaches the number of sequences in 
-    // the this chunk, it can be safely unloaded.
-    size_t m_sequenceRequestCount;

    // a non-owned pointer to the parser that created this chunk
    TextParser* m_parser;
@ -66,20 +56,24 @@ struct TextParser<ElemType>::StreamInfo
 };

 template <class ElemType>
-TextParser<ElemType>::TextParser(const TextConfigHelper& helper) :
-TextParser(helper.GetFilePath(), helper.GetStreams())
+TextParser<ElemType>::TextParser(const TextConfigHelper& helper) : TextParser(std::make_shared<CorpusDescriptor>(), helper)
+{}
+
+template <class ElemType>
+TextParser<ElemType>::TextParser(CorpusDescriptorPtr corpus, const TextConfigHelper& helper) :
+TextParser(corpus, helper.GetFilePath(), helper.GetStreams())
 {
    SetTraceLevel(helper.GetTraceLevel());
    SetMaxAllowedErrors(helper.GetMaxAllowedErrors());
-    SetChunkCacheSize(helper.GetNumChunksToCache());
    SetChunkSize(helper.GetChunkSize());
    SetSkipSequenceIds(helper.ShouldSkipSequenceIds());

    Initialize();
 }

+
 template <class ElemType>
-TextParser<ElemType>::TextParser(const std::wstring& filename, const vector<StreamDescriptor>& streams) : 
+TextParser<ElemType>::TextParser(CorpusDescriptorPtr corpus, const std::wstring& filename, const vector<StreamDescriptor>& streams) :
    m_filename(filename),
    m_file(nullptr),
    m_streamInfos(streams.size()),
@ -91,12 +85,12 @@ TextParser<ElemType>::TextParser(const std::wstring& filename, const vector<Stre
    m_bufferEnd(nullptr),
    m_pos(nullptr),
    m_chunkSizeBytes(0),
-    m_chunkCacheSize(0),
    m_traceLevel(TraceLevel::Error),
    m_hadWarnings(false),
    m_numAllowedErrors(0),
    m_skipSequenceIds(false),
-    m_numRetries(5)
+    m_numRetries(5),
+    m_corpus(corpus)
 {
    assert(streams.size() > 0);

@ -154,25 +148,29 @@ void TextParser<ElemType>::Initialize()

    attempt(m_numRetries, [this]()
    {
-        m_file = fopenOrDie(m_filename, L"rbS");
+        if (m_file == nullptr)
+        {
+            m_file = fopenOrDie(m_filename, L"rbS");
+        }
+        else if (ferror(m_file) != 0)
+        {
+            fclose(m_file);
+            m_file = fopenOrDie(m_filename, L"rbS");
+        }
+        
+        if (funicode(m_file))
+        {
+            // Retrying won't help here, the file is UTF-16 encoded.
+            m_numRetries = 0;
+            RuntimeError("Found a UTF-16 BOM at the beginning of the input file (%ls). "
+                "UTF-16 encoding is currently not supported.", m_filename.c_str());
+        }
+
+        m_indexer = make_unique<Indexer>(m_file, m_skipSequenceIds, m_chunkSizeBytes);
+
+        m_indexer->Build(m_corpus);
    });

-    if (funicode(m_file))
-    {
-        RuntimeError("Found a UTF-16 BOM at the beginning of the input file (%ls). "
-            "UTF-16 encoding is currently not supported.", m_filename.c_str());
-    }
-
-    m_indexer = make_unique<Indexer>(m_file, m_skipSequenceIds, m_chunkSizeBytes);
-
-    attempt(m_numRetries, [this]()
-    {
-        m_indexer->Build();
-    });
-
-    // it's still possible that the actual input data does not have sequence id column.
-    m_skipSequenceIds = !m_indexer->HasSequenceIds();
-
    assert(m_indexer != nullptr);

    int64_t position = _ftelli64(m_file);
@ -193,8 +191,8 @@ ChunkDescriptions TextParser<ElemType>::GetChunkDescriptions()
    const auto& index = m_indexer->GetIndex();

    ChunkDescriptions result;
-    result.reserve(index.size());
-    for (auto const& chunk : index)
+    result.reserve(index.m_chunks.size());
+    for (auto const& chunk : index.m_chunks)
    {
        result.push_back(shared_ptr<ChunkDescription>(
            new ChunkDescription {
@ -211,7 +209,7 @@ template <class ElemType>
 void TextParser<ElemType>::GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescription>& result)
 {
    const auto& index = m_indexer->GetIndex();
-    const auto& chunk = index[chunkId];
+    const auto& chunk = index.m_chunks[chunkId];
    result.reserve(chunk.m_sequences.size());

    for (auto const& s : chunk.m_sequences)
@ -232,7 +230,6 @@ TextParser<ElemType>::TextDataChunk::TextDataChunk(const ChunkDescriptor& descri
    m_parser(parser)
 {
    m_id = descriptor.m_id;
-    m_sequenceRequestCount = 0;
 }

 template <class ElemType>
@ -240,7 +237,6 @@ void TextParser<ElemType>::TextDataChunk::GetSequence(size_t sequenceId, std::ve
 {
    auto it = m_sequenceMap.find(sequenceId);
    assert(it != m_sequenceMap.end());
-    ++m_sequenceRequestCount;
    result.reserve(m_parser->m_streamInfos.size());
    const auto& sequenceData = it->second;
    for (size_t j = 0; j < m_parser->m_streamInfos.size(); ++j)
@ -278,50 +274,20 @@ void TextParser<ElemType>::TextDataChunk::GetSequence(size_t sequenceId, std::ve
 template <class ElemType>
 ChunkPtr TextParser<ElemType>::GetChunk(size_t chunkId)
 {
-    ChunkPtr chunk;
-    auto it = m_chunkCache.find(chunkId);
-    if (it != m_chunkCache.end())
+    const auto& chunkDescriptor = m_indexer->GetIndex().m_chunks[chunkId];
+    auto textChunk = make_shared<TextDataChunk>(chunkDescriptor, this);
+
+    attempt(m_numRetries, [this, &textChunk, &chunkDescriptor]()
    {
-        chunk = it->second;
-    }
-    else
-    {
-        const auto& chunkDescriptor = m_indexer->GetIndex()[chunkId];
-        auto textChunk = make_shared<TextDataChunk>(chunkDescriptor, this);
-
-        attempt(m_numRetries, [this, &textChunk, &chunkDescriptor]()
+        if (ferror(m_file) != 0)
        {
-            LoadChunk(textChunk, chunkDescriptor);
-        });
-
-        if (m_chunkCacheSize > 0 && m_chunkCache.size() == m_chunkCacheSize)
-        {
-            size_t candidateId = SIZE_MAX;
-            size_t minNumSequencesLeft = SIZE_MAX;
-            for (const auto& it : m_chunkCache)
-            {
-                const auto& chunk = *(it.second.get());
-                size_t numSequencesUsed = 0;
-                numSequencesUsed += chunk.m_sequenceRequestCount;
-                size_t numSequencesLeft = chunk.m_sequenceMap.size() - numSequencesUsed;
-                if (numSequencesLeft < minNumSequencesLeft)
-                {
-                    minNumSequencesLeft = numSequencesLeft;
-                    candidateId = it.first;
-                }
-            }
-            assert(candidateId != SIZE_MAX);
-            m_chunkCache.erase(candidateId);
+            fclose(m_file);
+            m_file = fopenOrDie(m_filename, L"rbS");
        }
+        LoadChunk(textChunk, chunkDescriptor);
+    });

-        if (m_chunkCacheSize > 0)
-        {
-            m_chunkCache[chunkId] = textChunk;
-        }
-
-        chunk = textChunk;
-    }
-    return chunk;
+    return textChunk;
 }

 template <class ElemType>
@ -331,7 +297,7 @@ void TextParser<ElemType>::LoadChunk(TextChunkPtr& chunk, const ChunkDescriptor&
    {
        chunk->m_sequenceMap.insert(make_pair(
            sequenceDescriptor.m_id,
-            LoadSequence(!m_skipSequenceIds, sequenceDescriptor)));
+            LoadSequence(sequenceDescriptor)));
    }
 }

@ -390,7 +356,7 @@ void TextParser<ElemType>::SetFileOffset(int64_t offset)
 }

 template <class ElemType>
-typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence(bool verifyId, const SequenceDescriptor& sequenceDsc)
+typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence(const SequenceDescriptor& sequenceDsc)
 {
    auto fileOffset = sequenceDsc.m_fileOffsetBytes;

@ -403,17 +369,6 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
    m_pos = m_bufferStart + bufferOffset;
    size_t bytesToRead = sequenceDsc.m_byteSize;

-    if (verifyId)
-    {
-        size_t id;
-        if (!TryReadUint64(id, bytesToRead) || id != sequenceDsc.m_id)
-        {
-            PrintWarningNotification();
-            RuntimeError("Did not find the expected sequence (id = %" PRIu64 ") %ls.",
-                sequenceDsc.m_id, GetFileInfo().c_str());
-        }
-    }
-
    SequenceBuffer sequence;

    // TODO: reuse loaded sequences instead of creating new ones!
@ -444,8 +399,10 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
            {
                fprintf(stderr,
                    "WARNING: Could not read a row (# %" PRIu64 ")"
-                    " while loading sequence (id = %" PRIu64 ") %ls.\n",
-                    i + 1, sequenceDsc.m_id, GetFileInfo().c_str());
+                    " while loading sequence (id = %ls) %ls.\n",
+                    i + 1,
+                    GetSequenceKey(sequenceDsc).c_str(),
+                    GetFileInfo().c_str());
            }
        }

@ -455,9 +412,10 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
            {
                fprintf(stderr,
                    "WARNING: Exhausted all input"
-                    " expected for the current sequence (id = %" PRIu64 ") %ls,"
-                    " but only read %" PRId64 " out of %" PRId64 " expected rows.\n",
-                    sequenceDsc.m_id, GetFileInfo().c_str(), numRowsRead, expectedRowCount);
+                    " expected for the current sequence (id = %ls) %ls,"
+                    " but only read %" PRIu64 " out of %" PRIu64 " expected rows.\n",
+                    GetSequenceKey(sequenceDsc).c_str(),
+                    GetFileInfo().c_str(), numRowsRead, expectedRowCount);
            }
            break;
        }
@ -466,14 +424,14 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
    // Double check if there are empty input streams.
    // TODO this handling needs to be graceful, but currently CNTK complains when we return empty sequences.
    bool hasEmptyInputs = false, hasDuplicateInputs = false;
-
+    size_t maxInputLength = 0;
    for (size_t i = 0; i < sequence.size(); ++i)
    {
        if (sequence[i]->m_numberOfSamples == 0)
        {
            fprintf(stderr,
-                "ERROR: Input ('%ls') is empty in sequence (id = %" PRIu64 ") %ls.\n",
-                m_streams[i]->m_name.c_str(), sequenceDsc.m_id, GetFileInfo().c_str());
+                "ERROR: Input ('%ls') is empty in sequence (id = %ls) %ls.\n",
+                m_streams[i]->m_name.c_str(), GetSequenceKey(sequenceDsc).c_str(), GetFileInfo().c_str());
            hasEmptyInputs = true;
        }

@ -484,11 +442,12 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
            {
                fprintf(stderr,
                    "WARNING: Input ('%ls') contains more samples than expected"
-                    " (%" PRId64 " vs. %" PRId64 ") for sequence (id = %" PRIu64 ") %ls.\n",
+                    " (%" PRIu64 " vs. %" PRIu64 ") for sequence (id = %ls) %ls.\n",
                    m_streams[i]->m_name.c_str(), sequence[i]->m_numberOfSamples, expectedRowCount,
-                    sequenceDsc.m_id, GetFileInfo().c_str());
+                    GetSequenceKey(sequenceDsc).c_str(), GetFileInfo().c_str());
            }
        }
+        maxInputLength = max(sequence[i]->m_numberOfSamples, maxInputLength);
    }

    if (hasEmptyInputs)
@ -501,13 +460,25 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
    {
        IncrementNumberOfErrorsOrDie();
    }
+    else if (maxInputLength < expectedRowCount)
+    {
+        if (ShouldWarn())
+        {
+            fprintf(stderr,
+                "WARNING: Maximum per-input number of samples for sequence (id = %ls) %ls"
+                " is less than expected (%" PRIu64 " vs. %" PRIu64 ").\n",
+                GetSequenceKey(sequenceDsc).c_str(),
+                GetFileInfo().c_str(), maxInputLength, expectedRowCount);
+        }
+        IncrementNumberOfErrorsOrDie();
+    }

    if (m_traceLevel >= Info)
    {
        fprintf(stderr,
-            "INFO: Finished loading sequence (id = %" PRIu64 ") %ls,"
+            "INFO: Finished loading sequence (id = %ls) %ls,"
            " successfully read %" PRIu64 " out of expected %" PRIu64 " rows.\n",
-            sequenceDsc.m_id, GetFileInfo().c_str(), numRowsRead, expectedRowCount);
+            GetSequenceKey(sequenceDsc).c_str(), GetFileInfo().c_str(), numRowsRead, expectedRowCount);
    }

    return sequence;
@ -528,14 +499,6 @@ bool TextParser<ElemType>::TryReadRow(SequenceBuffer& sequence, size_t& bytesToR
    {
        char c = *m_pos;

-        if (c == COLUMN_DELIMITER || c == VALUE_DELIMITER || c == CARRIAGE_RETURN)
-        {
-            // skip column and value separators, as well as carriage returns.
-            ++m_pos;
-            --bytesToRead;
-            continue;
-        }
-
        if (c == ROW_DELIMITER)
        {
            // found the end of row, skip the delimiter, return.
@ -551,13 +514,21 @@ bool TextParser<ElemType>::TryReadRow(SequenceBuffer& sequence, size_t& bytesToR
            {
                fprintf(stderr,
                    "WARNING: Input row %ls contains more"
-                    " samples than expected (%" PRId64 " vs. %" PRId64 ").\n",
+                    " samples than expected (%" PRIu64 " vs. %" PRIu64 ").\n",
                    GetFileInfo().c_str(), numSampleRead, m_streams.size());
            }

            return numSampleRead > 0;
        }

+        if (isColumnDelimiter(c))
+        {
+            // skip column (input) delimiters.
+            ++m_pos;
+            --bytesToRead;
+            continue;
+        }
+
        if (TryReadSample(sequence, bytesToRead))
        {
            numSampleRead++;
@ -685,9 +656,9 @@ bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
    {
        char c = *m_pos;

-        // an input id can be followed by a value marker, end of line (also, carriage return),
-        // column separator or the name prefix of the following input.
-        if (c <= VALUE_DELIMITER || c == NAME_PREFIX)
+        // stop as soon as there's a value delimiter, an input prefix
+        // or a non-printable character (e.g., newline, carriage return).
+        if (isValueDelimiter(c) || c == NAME_PREFIX || isNonPrintable(c))
        {
            size_t size = scratchIndex - m_scratch.get();
            if (size)
@ -758,16 +729,24 @@ bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t s
    {
        char c = *m_pos;

+        if (isValueDelimiter(c))
+        {
+            // skip value delimiters
+            ++m_pos;
+            --bytesToRead;
+            continue;
+        }
+
        // return as soon as we hit a non-printable or a name prefix
-        if (c < VALUE_DELIMITER || c == NAME_PREFIX)
+        if (isNonPrintable(c) || c == NAME_PREFIX)
        {
            if (counter > sampleSize)
            {
                if (ShouldWarn())
                {
                    fprintf(stderr,
-                        "WARNING: Dense sample (size = %" PRId64 ") %ls"
-                        " exceeds the expected size (%" PRId64 ").\n",
+                        "WARNING: Dense sample (size = %" PRIu64 ") %ls"
+                        " exceeds the expected size (%" PRIu64 ").\n",
                        counter, GetFileInfo().c_str(), sampleSize);
                }
                return false;
@ -781,7 +760,7 @@ bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t s
                {
                    fprintf(stderr,
                        "WARNING: A dense sample %ls has a sparse suffix "
-                        "(expected size = %" PRId64 ", actual size = %" PRId64 ").\n",
+                        "(expected size = %" PRIu64 ", actual size = %" PRIu64 ").\n",
                        GetFileInfo().c_str(), sampleSize, counter);
                }
                for (; counter < sampleSize; ++counter)
@ -793,14 +772,6 @@ bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t s
            return true;
        }

-        if (c == VALUE_DELIMITER)
-        {
-            // skip value delimiters
-            ++m_pos;
-            --bytesToRead;
-            continue;
-        }
-
        if (!TryReadRealNumber(value, bytesToRead))
        {
            // bail out.
@ -832,14 +803,7 @@ bool TextParser<ElemType>::TryReadSparseSample(std::vector<ElemType>& values, st
    {
        char c = *m_pos;

-        // return as soon as we hit a non-printable or a name prefix
-        if (c < VALUE_DELIMITER || c == NAME_PREFIX)
-        {
-            // empty sparse samples are allowed ("|InputeName_1|InputName2...")
-            return true;
-        }
-
-        if (c == VALUE_DELIMITER)
+        if (isValueDelimiter(c))
        {
            // skip value delimiters
            ++m_pos;
@ -847,6 +811,13 @@ bool TextParser<ElemType>::TryReadSparseSample(std::vector<ElemType>& values, st
            continue;
        }

+        // return as soon as we hit a non-printable or a name prefix
+        if (isNonPrintable(c) || c == NAME_PREFIX)
+        {
+            // empty sparse samples are allowed ("|InputeName_1|InputName2...")
+            return true;
+        }
+
        // read next sparse index
        if (!TryReadUint64(index, bytesToRead))
        {
@ -876,7 +847,7 @@ bool TextParser<ElemType>::TryReadSparseSample(std::vector<ElemType>& values, st
                fprintf(stderr,
                    "WARNING: Unexpected character('%c')"
                    " in place of the index delimiter ('%c')"
-                    " after a sparse value index (%" PRId64 ") %ls.\n",
+                    " after a sparse value index (%" PRIu64 ") %ls.\n",
                    c, INDEX_DELIMITER, index, GetFileInfo().c_str());
            }
            return false;
@ -913,8 +884,8 @@ void TextParser<ElemType>::SkipToNextValue(size_t& bytesToRead)
    while (bytesToRead && CanRead())
    {
        char c = *m_pos;
-        // skip everything until we hit either a value marker, an input marker or the end of row.
-        if (c == VALUE_DELIMITER || c == ROW_DELIMITER || c == NAME_PREFIX)
+        // skip everything until we hit either a value delimiter, an input marker or the end of row.
+        if (isValueDelimiter(c) || c == NAME_PREFIX || c == ROW_DELIMITER)
        {
            return;
        }
@ -950,19 +921,7 @@ bool TextParser<ElemType>::TryReadUint64(size_t& value, size_t& bytesToRead)

        if (!isdigit(c))
        {
-            if (isDelimiter(c))
-            {
-                return found;
-            }
-            
-            if (ShouldWarn())
-            {
-                fprintf(stderr,
-                    "WARNING: Unexpected character('%c') in a uint64 value %ls.\n",
-                    c, GetFileInfo().c_str());
-            }
-
-            return false;
+            return found;
        }

        found |= true;
@ -1213,12 +1172,6 @@ void TextParser<ElemType>::SetSkipSequenceIds(bool skip)
    m_skipSequenceIds = skip;
 }

-template <class ElemType>
-void TextParser<ElemType>::SetChunkCacheSize(unsigned int size)
-{
-    m_chunkCacheSize = size;
-}
-
 template <class ElemType>
 void TextParser<ElemType>::SetChunkSize(size_t size)
 {
@ -1239,6 +1192,28 @@ std::wstring TextParser<ElemType>::GetFileInfo()
    return info.str();
 }

+static SequenceDescription s_InvalidSequence{0, 0, 0, false, {0, 0}};
+
+template <class ElemType>
+void TextParser<ElemType>::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& result)
+{
+    const auto& keys = m_indexer->GetIndex().m_keyToSequenceInChunk;
+    auto sequenceLocation = keys.find(key.m_sequence);
+    if (sequenceLocation == keys.end())
+    {
+        result = s_InvalidSequence;
+        return;
+    }
+
+    result = m_indexer->GetIndex().m_chunks[sequenceLocation->second.first].m_sequences[sequenceLocation->second.second];
+}
+
+template <class ElemType>
+const wstring& TextParser<ElemType>::GetSequenceKey(const SequenceDescriptor& s) const
+{
+    return m_corpus->GetStringRegistry()[s.m_key.m_sequence];
+}
+
 template class TextParser<float>;
 template class TextParser<double>;
 }}}
--- a/Source/Readers/CNTKTextFormatReader/TextParser.h
+++ b/Source/Readers/CNTKTextFormatReader/TextParser.h
@ -9,6 +9,7 @@
 #include "Descriptors.h"
 #include "TextConfigHelper.h"
 #include "Indexer.h"
+#include "CorpusDescriptor.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -22,10 +23,9 @@ class TextParser : public DataDeserializerBase {
 public:
    explicit TextParser(const TextConfigHelper& helper);

-    ~TextParser();
+    TextParser(CorpusDescriptorPtr corpus, const TextConfigHelper& helper);

-    // Builds an index of the input data.
-    void Initialize();
+    ~TextParser();

    // Retrieves a chunk of data.
    ChunkPtr GetChunk(size_t chunkId) override;
@ -36,7 +36,12 @@ public:
    // Get information about particular chunk.
    void GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescription>& result) override;

+    void GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override;
+
 private:
+    // Builds an index of the input data.
+    void Initialize();
+
    // A buffer to keep data for all samples in a (variable length) sequence 
    // from a single input stream.
    struct InputStreamBuffer
@ -106,7 +111,6 @@ private:
    unique_ptr<char[]> m_scratch; // local buffer for string parsing

    size_t m_chunkSizeBytes;
-    unsigned int m_chunkCacheSize; // number of chunks to keep in the memory
    unsigned int m_traceLevel;
    bool m_hadWarnings;
    unsigned int m_numAllowedErrors;
@ -114,9 +118,8 @@ private:
    unsigned int m_numRetries; // specifies the number of times an unsuccessful 
    // file operation should be repeated (default value is 5).

-    // A map of currently loaded chunks
-    // TODO: remove caching once partial randomization is in master.
-    std::map<size_t, TextChunkPtr> m_chunkCache;
+    // Corpus descriptor.
+    CorpusDescriptorPtr m_corpus;

    // throws runtime exception when number of parsing errors is 
    // greater than the specified threshold
@ -166,12 +169,12 @@ private:
    bool inline ShouldWarn() { m_hadWarnings = true; return m_traceLevel >= Warning; }

    // Given a descriptor, retrieves the data for the corresponding sequence from the file.
-    SequenceBuffer LoadSequence(bool verifyId, const SequenceDescriptor& descriptor);
+    SequenceBuffer LoadSequence(const SequenceDescriptor& descriptor);

    // Given a descriptor, retrieves the data for the corresponding chunk from the file.
    void LoadChunk(TextChunkPtr& chunk, const ChunkDescriptor& descriptor);

-    TextParser(const std::wstring& filename, const vector<StreamDescriptor>& streams);
+    TextParser(CorpusDescriptorPtr corpus, const std::wstring& filename, const vector<StreamDescriptor>& streams);

    void SetTraceLevel(unsigned int traceLevel);

@ -181,12 +184,12 @@ private:

    void SetChunkSize(size_t size);

-    void SetChunkCacheSize(unsigned int size);
-
    void SetNumRetries(unsigned int numRetries);

    friend class CNTKTextFormatReaderTestRunner<ElemType>;

+    const std::wstring& GetSequenceKey(const SequenceDescriptor& s) const;
+
    DISABLE_COPY_AND_MOVE(TextParser);
 };
 }}}
--- a/Source/Readers/CNTKTextFormatReader/TextReaderConstants.h
+++ b/Source/Readers/CNTKTextFormatReader/TextReaderConstants.h
@ -7,13 +7,37 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

-    const char COLUMN_DELIMITER = '\t';
+    const char SPACE_CHAR = ' ';
+    const char TAB_CHAR = '\t';
+
    const char NAME_PREFIX = '|';
-    const char VALUE_DELIMITER = ' ';
+    
    const char INDEX_DELIMITER = ':';
+
    const char ROW_DELIMITER = '\n';
-    const char CARRIAGE_RETURN = '\r';
+    
    const char ESCAPE_SYMBOL = '#';

    const auto BUFFER_SIZE = 256 * 1024;
+
+    inline bool isPrintable(char c)
+    {
+        return c >= SPACE_CHAR;
+    }
+
+    inline bool isNonPrintable(char c)
+    {
+        return !isPrintable(c);
+    }
+
+    inline bool isValueDelimiter(char c)
+    {
+        return c == SPACE_CHAR || c == TAB_CHAR;
+    }
+
+    inline bool isColumnDelimiter(char c)
+    {
+        return isValueDelimiter(c) || (isNonPrintable(c) && c != ROW_DELIMITER);
+    }
+
 }}}
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.cpp
@ -2,7 +2,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 //
-// CompositeDataReader.cpp : Defines a reader that allows composing different deserializers.
+// CompositeReader.cpp : Defines a reader that allows composing different deserializers.
 // With this reader in place the users should only extend deserializers.
 //

@ -11,8 +11,6 @@
 #define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms  --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
 #endif

-#define DATAREADER_EXPORTS // creating the exports here
-
 #include "CompositeDataReader.h"
 #include "Bundler.h"
 #include "BlockRandomizer.h"
@ -20,17 +18,21 @@
 #include "FramePacker.h"
 #include "SequencePacker.h"
 #include "TruncatedBpttPacker.h"
-#include "HeapMemoryProvider.h"
 #include "CorpusDescriptor.h"
+#include "ConfigUtil.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

+// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
+// directly to the new Reader API. 
+// For more information please see its header file.
+// This method composes together packers + randomizer + a set of transformers and deserializers.
 CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryProviderPtr provider) : m_layout(make_shared<MBLayout>()),
    m_corpus(std::make_shared<CorpusDescriptor>()),
    m_provider(provider)
 {
    // Identifying packing mode.
-    bool frameMode = config(L"frameMode", true);
+    bool frameMode = config(L"frameMode", false);
    bool truncated = config(L"truncated", false);
    if (frameMode && truncated)
    {
@ -55,37 +57,56 @@ CompositeDataReader::CompositeDataReader(const ConfigParameters& config, MemoryP
        m_packingMode = PackingMode::sequence;
    }

-    // Whether we need to check data between different deserializers.
-    bool cleanse = config(L"checkData", false);
+    m_precision = config("precision", "float");

    // Creating deserializers.
    // TODO: Currently the primary deserializer defines the corpus. The logic will be moved to CorpusDescriptor class.
    CreateDeserializers(config);

-    // Bundling deserializers together.
-    // TODO: Add transformers in between.
-    auto bundler = std::make_shared<Bundler>(config, m_deserializers[0], m_deserializers, cleanse);
+    if (m_deserializers.empty())
+    {
+        InvalidArgument("Could not find deserializers in the reader config.");
+    }
+
+    IDataDeserializerPtr deserializer = m_deserializers.front();
+    if (m_deserializers.size() > 1)
+    {
+        // Bundling deserializers together.
+        // Option whether we need to check data between different deserializers.
+        bool cleanse = config(L"checkData", true);
+        deserializer = std::make_shared<Bundler>(config, deserializer, m_deserializers, cleanse);
+    }

    int verbosity = config(L"verbosity", 2);

    // Pick up the randomizer.
    bool randomize = config(L"randomize", false);
+    // By default do not use omp threads for deserialization of sequences.
+    // It makes sense to put it to true for cases when deserialization is CPU intensive,
+    // i.e. decompression of images.
+    bool multiThreadedDeserialization = config(L"multiThreadedDeserialization", false);
    if (randomize)
    {
        // By default randomizing the whole data set.
        size_t randomizationWindow = config(L"randomizationWindow", requestDataSize);
-        m_randomizer = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, bundler, BlockRandomizer::DecimationMode::chunk, true);
+        // By default using STL random number generator.
+        bool useLegacyRandomization = config(L"useLegacyRandomization", false);
+        m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, BlockRandomizer::DecimationMode::chunk, useLegacyRandomization, multiThreadedDeserialization);
    }
    else
    {
-        m_randomizer = std::make_shared<NoRandomizer>(bundler);
+        m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer, multiThreadedDeserialization);
    }

-    m_randomizer->Initialize(nullptr, config);
+    // In case when there are transforms, applying them to the data.
+    m_sequenceEnumerator = m_transforms.empty()
+        ? m_sequenceEnumerator 
+        : std::make_shared<TransformController>(m_transforms, m_sequenceEnumerator);

    // Create output stream descriptions - where to get those? from config? what if it is not the same as network expects?
-    // TODO: Currently only sparse streams.
-    for (const auto& streamDescription : bundler->GetStreamDescriptions())
+    // TODO: Currently only dense output streams.
+    // TODO: Check here. We should already support repacking sparse into dense in the shim/matrix.
+    for (const auto& streamDescription : m_sequenceEnumerator->GetStreamDescriptions())
    {
        StreamDescriptionPtr stream = std::make_shared<StreamDescription>(*streamDescription);
        stream->m_storageType = StorageType::dense;
@ -104,13 +125,17 @@ Minibatch CompositeDataReader::ReadMinibatch()
    return m_packer->ReadMinibatch();
 }

+// Create deserializers based on the specified configuration. 
+// deserializers = [
+//        [ type = "ImageDataDeserializer" module = "ImageReader" ...]
+//        [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...]
 void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
 {
    argvector<ConfigValue> deserializerConfigs =
        readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {})));

    assert(m_deserializers.empty());
-    bool primary = true;  // CUrrently, the first deserializer becomes primary - it drives chunking.
+    bool primary = true;  // Currently, the first deserializer becomes primary - it drives chunking.
    for (size_t i = 0; i < deserializerConfigs.size(); ++i)
    {
        // TODO: Should go away in the future. Framing can be done on top of deserializers.
@ -124,6 +149,8 @@ void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConf
    }
 }

+// Creates a particular deserializer based on the config: its loads the external module and calls CreateDeserializer
+// factory function for a particular deserializer type.
 IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParameters& deserializerConfig, bool primary)
 {
    typedef bool(*CreateDeserializerFactory) (IDataDeserializer** d, const std::wstring& type, const ConfigParameters& cfg, CorpusDescriptorPtr corpus, bool primary);
@ -138,10 +165,77 @@ IDataDeserializerPtr CompositeDataReader::CreateDeserializer(const ConfigParamet
        RuntimeError("Cannot create deserializer. Please check module and type in the configuration.");
    }

+    // Create transformers if necessary.
+    CreateTransforms(deserializerConfig);
+
    assert(d != nullptr);
    return IDataDeserializerPtr(d);
 }

+// Create transformers based on the configuration, i.e.
+// deserializers = [
+//     [
+//         type = "ImageDataDeserializer"
+//         module = "ImageReader"
+//         inputs = [
+//               features = [
+//---->              transforms = [
+//                       [type = "Crop"]:[type = "Scale"]...
+
+void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
+{
+    std::string defaultModule = deserializerConfig("module");
+    argvector<ConfigParameters> inputs = deserializerConfig("input");
+    for (size_t i = 0; i < inputs.size(); ++i)
+    {
+        // Trying to find transfomers in a stream section of the config.
+        auto inputSections = TryGetSectionsWithParameter(inputs[i], "transforms");
+        if (inputSections.size() > 1)
+        {
+            LogicError("Only a single 'transforms' config is allowed per stream.");
+        }
+
+        // No need to create anything for this stream, skipping.
+        if (inputSections.empty())
+        {
+            continue;
+        }
+
+        ConfigParameters input = inputs[i](inputSections.front());
+        std::wstring inputName = msra::strfun::utf16(input.ConfigName());
+
+        // Read tranformers in order and appending them to the transformer pipeline.
+        argvector<ConfigParameters> transforms = input("transforms");
+        for (size_t j = 0; j < transforms.size(); ++j)
+        {
+            TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
+            m_transforms.push_back(Transformation{transformer, inputName});
+        }
+    }
+
+}
+
+// Create a transformer for a particular configuration. Loading it from the module of the deserializer if module is not specified, i.e.
+//     transforms = [
+//         [type = "Scale" width=...]:...
+TransformerPtr CompositeDataReader::CreateTransformer(const ConfigParameters& config, const string& defaultModule)
+{
+    typedef bool(*TransformerFactory) (Transformer** t, const std::wstring& type, const ConfigParameters& cfg);
+
+    std::string transformerModule = config("module", defaultModule.c_str());
+    TransformerFactory f = (TransformerFactory)Plugin::Load(transformerModule, "CreateTransformer");
+
+    std::wstring transformerType = config("type");
+    Transformer* t;
+    if (!f(&t, transformerType, config))
+    {
+        RuntimeError("Cannot create transformer. Please check the module and type in the configuration.");
+    }
+
+    assert(t != nullptr);
+    return TransformerPtr(t);
+}
+
 void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
 {
    EpochConfiguration config = cfg;
@ -151,7 +245,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
        RuntimeError("Unsupported minibatch size '%d'.", (int)config.m_totalEpochSizeInSamples);
    }

-    m_randomizer->StartEpoch(config);
+    m_sequenceEnumerator->StartEpoch(config);

    // TODO: As the next step the packers should be moved into the network.
    switch (m_packingMode)
@ -159,13 +253,13 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
    case PackingMode::sample:
        m_packer = std::make_shared<FramePacker>(
            m_provider,
-            m_randomizer,
+            m_sequenceEnumerator,
            m_streams);
        break;
    case PackingMode::sequence:
        m_packer = std::make_shared<SequencePacker>(
            m_provider,
-            m_randomizer,
+            m_sequenceEnumerator,
            m_streams);
        break;
    case PackingMode::truncated:
@ -173,7 +267,7 @@ void CompositeDataReader::StartEpoch(const EpochConfiguration& cfg)
        config.m_truncationSize = m_truncationLength;
        m_packer = std::make_shared<TruncatedBPTTPacker>(
            m_provider,
-            m_randomizer,
+            m_sequenceEnumerator,
            m_streams);
        break;
    }
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.h
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.h
@ -9,7 +9,9 @@
 #include <string>
 #include <future>
 #include "DataReader.h"
-#include <Reader.h>
+#include "Reader.h"
+#include "Transformer.h"
+#include "TransformController.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -44,13 +46,11 @@ struct Minibatch;
 //     - sequences can be transformed by the transformers applied on top of deserializer (TODO: not yet in place)
 //     - deserializers are bound together using the bundler - it bundles sequences with the same sequence id retrieved from different deserializers
 //     - packer is used to pack randomized sequences into the minibatch
-// The composite data reader is currently also responsible for asynchronous prefetching of the minibatch data.
+// The composite reader is currently also responsible for asynchronous prefetching of the minibatch data.

 // In order not to break existing configs and allow deserializers composition it exposes the same interface as the old readers, but it is not exposed
 // to external developers. The actual "reader developer" now has to provide deserializer(s) only.
 // TODO: Implement proper corpus descriptor.
-// TODO: Add transformers as the next step.
-// TODO: Same code as in ReaderLib shim, the one in the ReaderLib will be deleted as the next step.
 // TODO: Change this interface when SGD is changed.
 class CompositeDataReader : public Reader, protected Plugin
 {
@ -68,7 +68,10 @@ public:

 private:
    void CreateDeserializers(const ConfigParameters& readerConfig);
+    void CreateTransforms(const ConfigParameters& deserializerConfig);
+
    IDataDeserializerPtr CreateDeserializer(const ConfigParameters& readerConfig, bool primary);
+    TransformerPtr CreateTransformer(const ConfigParameters& config, const std::string& defaultModule);


    enum class PackingMode
@ -103,9 +106,11 @@ private:
    // A list of deserializers.
    std::vector<IDataDeserializerPtr> m_deserializers;

-    // Randomizer.
-    // TODO: remove Transformer interface from randomizer.
-    TransformerPtr m_randomizer;
+    // A list of transformers.
+    std::vector<Transformation> m_transforms;
+
+    // Sequence provider.
+    SequenceEnumeratorPtr m_sequenceEnumerator;

    // TODO: Should be removed. We already have matrices on this level.
    // Should just get the corresponding pinned memory.
--- a/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters
+++ b/Source/Readers/CompositeDataReader/CompositeDataReader.vcxproj.filters
@ -3,8 +3,8 @@
  <ItemGroup>
    <ClCompile Include="dllmain.cpp" />
    <ClCompile Include="stdafx.cpp" />
-    <ClCompile Include="CompositeDataReader.cpp" />
    <ClCompile Include="Exports.cpp" />
+    <ClCompile Include="CompositeDataReader.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="stdafx.h" />
--- a/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/ConfigHelper.cpp
@ -198,8 +198,16 @@ size_t ConfigHelper::GetRandomizationWindow()

 wstring ConfigHelper::GetRandomizer()
 {
-    // get the read method, defaults to "blockRandomize"
-    wstring randomizer(m_config(L"readMethod", L"blockRandomize"));
+    // Check (on the action) if we're writing (inputs only) or training/evaluating (inputs and outputs)
+    bool isActionWrite = wstring(m_config(L"action", L"")) == L"write";
+
+    // Get the read method, defaults to "blockRandomize".
+    wstring randomizer = m_config(L"readMethod", L"blockRandomize");
+
+    if (isActionWrite && randomizer != L"none")
+    {
+        InvalidArgument("'readMethod' must be 'none' for write action.");
+    }

    if (randomizer == L"blockRandomize" && GetRandomizationWindow() == randomizeNone)
    {
@ -231,7 +239,7 @@ vector<wstring> ConfigHelper::GetSequencePaths()
    // post processing file list :
    //  - if users specified PrefixPath, add the prefix to each of path in filelist
    //  - else do the dotdotdot expansion if necessary
-    if (!rootPath.empty()) // use has specified a path prefix for this  feature
+    if (!rootPath.empty()) // user has specified a path prefix for this feature
    {
        // first make slash consistent (sorry for Linux users:this is not necessary for you)
        replace(rootPath.begin(), rootPath.end(), L'\\', L'/');
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp
@ -36,7 +36,7 @@ HTKDataDeserializer::HTKDataDeserializer(
    // TODO: This should be read in one place, potentially given by SGD.
    m_frameMode = (ConfigValue)cfg("frameMode", "true");

-    argvector<ConfigValue> inputs = cfg("inputs");
+    argvector<ConfigValue> inputs = cfg("input");
    if (inputs.size() != 1)
    {
        InvalidArgument("HTKDataDeserializer supports a single input stream only.");
@ -127,22 +127,12 @@ void HTKDataDeserializer::InitializeChunkDescriptions(ConfigHelper& config)
        }

        wstring key = description.GetKey();
-        size_t id = 0;
-        if (m_primary)
+        if (!m_corpus->IsIncluded(key))
        {
-            // TODO: Definition of the corpus should be moved to the CorpusDescriptor
-            // TODO: All keys should be added there. Currently, we add them in the driving deserializer.
-            id = stringRegistry.AddValue(key);
-        }
-        else
-        {
-            if (!stringRegistry.TryGet(key, id))
-            {
-                // Utterance is unknown, skipping it.
-                continue;
-            }
+            continue;
        }

+        size_t id = stringRegistry[key];
        description.SetId(id);
        utterances.push_back(description);
        m_totalNumberOfFrames += numberOfFrames;
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.cpp
@ -27,9 +27,9 @@ std::vector<IDataDeserializerPtr> CreateDeserializers(const ConfigParameters& re
    ConfigHelper config(readerConfig);

    config.GetDataNamesFromConfig(featureNames, labelNames, notused, notused);
-    if (featureNames.size() < 1 || labelNames.size() < 1)
+    if (featureNames.size() < 1)
    {
-        InvalidArgument("Network needs at least 1 feature and 1 label specified.");
+        InvalidArgument("Network needs at least 1 feature specified.");
    }

    CorpusDescriptorPtr corpus = std::make_shared<CorpusDescriptor>();
@ -105,7 +105,8 @@ HTKMLFReader::HTKMLFReader(MemoryProviderPtr provider,
        LogicError("Please specify at least a single input stream.");
    }

-    auto bundler = std::make_shared<Bundler>(readerConfig, deserializers[0], deserializers, false);
+    bool cleanse = readerConfig(L"checkData", false);
+    auto bundler = std::make_shared<Bundler>(readerConfig, deserializers[0], deserializers, cleanse);
    int verbosity = readerConfig(L"verbosity", 2);
    std::wstring readMethod = config.GetRandomizer();

@ -123,8 +124,6 @@ HTKMLFReader::HTKMLFReader(MemoryProviderPtr provider,
        RuntimeError("readMethod must be 'blockRandomize' or 'none'.");
    }

-    m_randomizer->Initialize(nullptr, readerConfig);
-
    // Create output stream descriptions (all dense)
    for (auto d : deserializers)
    {
--- a/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h
+++ b/Source/Readers/ExperimentalHTKMLFReader/HTKMLFReader.h
@ -8,6 +8,7 @@
 #include "Reader.h"
 #include "Packer.h"
 #include "Config.h"
+#include "SequenceEnumerator.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -54,8 +55,7 @@ private:
    // Memory provider (TODO: this will possibly change in the near future.)
    MemoryProviderPtr m_provider;

-    // TODO: Randomizer won't implement transformer interface in the near future.
-    TransformerPtr m_randomizer;
+    SequenceEnumeratorPtr m_randomizer;

    // Truncation length for BPTT mode.
    size_t m_truncationLength;
--- a/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp
+++ b/Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp
@ -54,7 +54,7 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
        LogicError("Mlf deserializer does not support primary mode - it cannot control chunking.");
    }

-    argvector<ConfigValue> inputs = cfg("inputs");
+    argvector<ConfigValue> inputs = cfg("input");
    if (inputs.size() != 1)
    {
        LogicError("MLFDataDeserializer supports a single input stream only.");
@ -124,7 +124,7 @@ void MLFDataDeserializer::InitializeChunkDescriptions(CorpusDescriptorPtr corpus
    description.m_isValid = true;
    size_t totalFrames = 0;

-    auto& stringRegistry = corpus->GetStringRegistry();
+    const auto& stringRegistry = corpus->GetStringRegistry();

    // TODO resize m_keyToSequence with number of IDs from string registry

--- a/Source/Readers/HTKMLFReader/HTKMLFReader.cpp
+++ b/Source/Readers/HTKMLFReader/HTKMLFReader.cpp
@ -599,7 +599,7 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
 }

 // Load all input and output data.
-// Note that the terms features imply be real-valued quanities and
+// Note that the terms features imply be real-valued quantities and
 // labels imply categorical quantities, irrespective of whether they
 // are inputs or targets for the network
 // TODO: lots of code dup with the other Prepare function
--- a/Source/Readers/HTKMLFReader/htkfeatio.h
+++ b/Source/Readers/HTKMLFReader/htkfeatio.h
@ -298,11 +298,9 @@ public:
 #else
        W.close(numframes);
 #endif
-#ifdef _WIN32 // BUGBUG: and on Linux??
        // rename to final destination
        // (This would only fail in strange circumstances such as accidental multiple processes writing to the same file.)
        renameOrDie(tmppath, path);
-#endif
    }
 };

@ -417,7 +415,8 @@ public:
                    if (xpath.empty())
                        malformed(pathParam);
                    e = msra::strfun::toint(consume(xpath, L"]"));
-                    if (!xpath.empty())
+                    // TODO \r should be handled elsewhere; refine this
+                    if (!xpath.empty() && xpath != L"\r")
                        malformed(pathParam);
                    isarchive = true;
                }
--- a/Source/Readers/ImageReader/Exports.cpp
+++ b/Source/Readers/ImageReader/Exports.cpp
@ -11,7 +11,9 @@
 #include "ReaderShim.h"
 #include "ImageReader.h"
 #include "HeapMemoryProvider.h"
-#include "CudaMemoryProvider.h"
+#include "ImageDataDeserializer.h"
+#include "ImageTransformers.h"
+#include "CorpusDescriptor.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -32,4 +34,41 @@ extern "C" DATAREADER_API void GetReaderD(IDataReader** preader)
    *preader = new ReaderShim<double>(factory);
 }

+// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
+// A factory method for creating image deserializers.
+extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
+{
+    if (type == L"ImageDataDeserializer")
+        *deserializer = new ImageDataDeserializer(corpus, deserializerConfig);
+    else
+        // Unknown type.
+        return false;
+
+    // Deserializer created.
+    return true;
+}
+
+// A factory method for creating image transformers.
+extern "C" DATAREADER_API bool CreateTransformer(Transformer** transformer, const std::wstring& type, const ConfigParameters& config)
+{
+    if (type == L"Crop")
+        *transformer = new CropTransformer(config);
+    else if (type == L"Scale")
+        *transformer = new ScaleTransformer(config);
+    else if (type == L"Color")
+        *transformer = new ColorTransformer(config);
+    else if (type == L"Intensity")
+        *transformer = new IntensityTransformer(config);
+    else if (type == L"Mean")
+        *transformer = new MeanTransformer(config);
+    else if (type == L"Transpose")
+        *transformer = new TransposeTransformer(config);
+    else
+        // Unknown type.
+        return false;
+
+    // Transformer created.
+    return true;
+}
+
 }}}
--- a/Source/Readers/ImageReader/ImageConfigHelper.cpp
+++ b/Source/Readers/ImageReader/ImageConfigHelper.cpp
@ -6,33 +6,15 @@
 #include "stdafx.h"
 #include "ImageConfigHelper.h"
 #include "StringUtil.h"
+#include "ConfigUtil.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

-std::vector<std::string> GetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
-{
-    std::vector<std::string> sectionNames;
-    for (const std::pair<std::string, ConfigParameters>& section : config)
-    {
-        if (section.second.ExistsCurrent(parameterName))
-        {
-            sectionNames.push_back(section.first);
-        }
-    }
-
-    if (sectionNames.empty())
-    {
-        RuntimeError("ImageReader requires %s parameter.", parameterName.c_str());
-    }
-
-    return sectionNames;
-}
-
 ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
    : m_dataFormat(CHW)
 {
-    std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
-    std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");
+    std::vector<std::string> featureNames = GetSectionsWithParameter("ImageReader", config, "width");
+    std::vector<std::string> labelNames = GetSectionsWithParameter("ImageReader", config, "labelDim");

    // REVIEW alexeyk: currently support only one feature and label section.
    if (featureNames.size() != 1 || labelNames.size() != 1)
@ -77,7 +59,7 @@ ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)

    m_mapPath = config(L"file");

-    m_grayscale = config(L"grayscale", false);
+    m_grayscale = config(L"grayscale", c == 1);
    std::string rand = config(L"randomize", "auto");

    if (AreEqualIgnoreCase(rand, "auto"))
@ -112,7 +94,7 @@ ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)

    m_cpuThreadCount = config(L"numCPUThreads", 0);

-    m_multiViewCrop = AreEqualIgnoreCase((string)featureSection(L"cropType", ""), "multiview10");
+    m_cropType = ParseCropType(featureSection(L"cropType", ""));
 }

 std::vector<StreamDescriptionPtr> ImageConfigHelper::GetStreams() const
@ -136,4 +118,25 @@ std::string ImageConfigHelper::GetMapPath() const
 {
    return m_mapPath;
 }
+
+CropType ImageConfigHelper::ParseCropType(const std::string &src)
+{
+    if (src.empty() || AreEqualIgnoreCase(src, "center"))
+    {
+        return CropType::Center;
+    }
+
+    if (AreEqualIgnoreCase(src, "random"))
+    {
+        return CropType::Random;
+    }
+
+    if (AreEqualIgnoreCase(src, "multiview10"))
+    {
+        return CropType::MultiView10;
+    }
+
+    RuntimeError("Invalid crop type: %s.", src.c_str());
+}
+
 }}}
--- a/Source/Readers/ImageReader/ImageConfigHelper.h
+++ b/Source/Readers/ImageReader/ImageConfigHelper.h
@ -12,6 +12,13 @@

 namespace Microsoft { namespace MSR { namespace CNTK {

+enum class CropType
+{
+    Center = 0,
+    Random = 1,
+    MultiView10 = 2
+};
+
 // A helper class for image specific parameters.
 // A simple wrapper around CNTK ConfigParameters.
 class ImageConfigHelper
@ -50,12 +57,19 @@ public:
    {
        return m_grayscale;
    }
-	
-	bool IsMultiViewCrop() const
+
+    CropType GetCropType() const
    {
-        return m_multiViewCrop;
+        return m_cropType;
    }

+    bool IsMultiViewCrop() const
+    {
+        return m_cropType == CropType::MultiView10;
+    }
+
+    static CropType ParseCropType(const std::string &src);
+
 private:
    ImageConfigHelper(const ImageConfigHelper&) = delete;
    ImageConfigHelper& operator=(const ImageConfigHelper&) = delete;
@ -65,8 +79,8 @@ private:
    ImageLayoutKind m_dataFormat;
    int m_cpuThreadCount;
    bool m_randomize;
-    bool m_multiViewCrop;
    bool m_grayscale;
+    CropType m_cropType;
 };

 typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;
--- a/Source/Readers/ImageReader/ImageDataDeserializer.cpp
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.cpp
@ -11,6 +11,8 @@
 #include <limits>
 #include "ImageDataDeserializer.h"
 #include "ImageConfigHelper.h"
+#include "StringUtil.h"
+#include "ConfigUtil.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -115,13 +117,66 @@ public:
    }
 };

+// A new constructor to support new compositional configuration,
+// that allows composition of deserializers and transforms on inputs.
+ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config)
+{
+    ConfigParameters inputs = config("input");
+    std::vector<std::string> featureNames = GetSectionsWithParameter("ImageDataDeserializer", inputs, "transforms");
+    std::vector<std::string> labelNames = GetSectionsWithParameter("ImageDataDeserializer", inputs, "labelDim");
+
+    // TODO: currently support only one feature and label section.
+    if (featureNames.size() != 1 || labelNames.size() != 1)
+    {
+        RuntimeError(
+            "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
+            static_cast<int>(featureNames.size()),
+            static_cast<int>(labelNames.size()));
+    }
+
+    string precision = (ConfigValue)config("precision", "float");
+
+    // Feature stream.
+    ConfigParameters featureSection = inputs(featureNames[0]);
+    auto features = std::make_shared<StreamDescription>();
+    features->m_id = 0;
+    features->m_name = msra::strfun::utf16(featureSection.ConfigName());
+    features->m_storageType = StorageType::dense;
+    features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
+    m_streams.push_back(features);
+
+    // Label stream.
+    ConfigParameters label = inputs(labelNames[0]);
+    size_t labelDimension = label("labelDim");
+    auto labels = std::make_shared<StreamDescription>();
+    labels->m_id = 1;
+    labels->m_name = msra::strfun::utf16(label.ConfigName());
+    labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
+    labels->m_storageType = StorageType::sparse_csc;
+    labels->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
+    m_streams.push_back(labels);
+
+    m_labelGenerator = labels->m_elementType == ElementType::tfloat ?
+        (LabelGeneratorPtr)std::make_shared<TypedLabelGenerator<float>>(labelDimension) :
+        std::make_shared<TypedLabelGenerator<double>>(labelDimension);
+
+    m_grayscale = config(L"grayscale", false);
+
+    // TODO: multiview should be done on the level of randomizer/transformers - it is responsiblity of the
+    // TODO: randomizer to collect how many copies each transform needs and request same sequence several times.
+    bool multiViewCrop = config(L"multiViewCrop", false);
+    CreateSequenceDescriptions(corpus, config(L"file"), labelDimension, multiViewCrop);
+}
+
+// TODO: Should be removed at some point.
+// Supports old type of ImageReader configuration.
 ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
 {
    ImageConfigHelper configHelper(config);
    m_streams = configHelper.GetStreams();
    assert(m_streams.size() == 2);
    m_grayscale = configHelper.UseGrayscale();
-	const auto& label = m_streams[configHelper.GetLabelStreamId()];
+    const auto& label = m_streams[configHelper.GetLabelStreamId()];
    const auto& feature = m_streams[configHelper.GetFeatureStreamId()];

    // Expect data in HWC.
@ -147,7 +202,7 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
        RuntimeError("Unsupported label element type '%d'.", (int)label->m_elementType);
    }

-    CreateSequenceDescriptions(configHelper.GetMapPath(), labelDimension, configHelper);
+    CreateSequenceDescriptions(std::make_shared<CorpusDescriptor>(), configHelper.GetMapPath(), labelDimension, configHelper.IsMultiViewCrop());
 }

 // Descriptions of chunks exposed by the image reader.
@ -173,7 +228,7 @@ void ImageDataDeserializer::GetSequencesForChunk(size_t chunkId, std::vector<Seq
    result.push_back(m_imageSequences[chunkId]);
 }

-void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, const ImageConfigHelper& config)
+void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpus, std::string mapPath, size_t labelDimension, bool isMultiCrop)
 {
    std::ifstream mapFile(mapPath);
    if (!mapFile)
@ -181,7 +236,7 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
        RuntimeError("Could not open %s for reading.", mapPath.c_str());
    }

-    size_t itemsPerLine = config.IsMultiViewCrop() ? 10 : 1;
+    size_t itemsPerLine = isMultiCrop ? 10 : 1;
    size_t curId = 0;
    std::string line;
    PathReaderMap knownReaders;
@ -189,13 +244,30 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
    description.m_numberOfSamples = 1;
    description.m_isValid = true;

+    auto& stringRegistry = corpus->GetStringRegistry();
    for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
    {
        std::stringstream ss(line);
-        std::string imagePath;
-        std::string classId;
-        if (!std::getline(ss, imagePath, '\t') || !std::getline(ss, classId, '\t'))
-            RuntimeError("Invalid map file format, must contain 2 tab-delimited columns, line %" PRIu64 " in file %s.", lineIndex, mapPath.c_str());
+        std::string imagePath, classId, sequenceKey;
+        // Try to parse sequence id, file path and label.
+        if (!std::getline(ss, sequenceKey, '\t') || !std::getline(ss, imagePath, '\t') || !std::getline(ss, classId, '\t'))
+        {
+            // In case when the sequence key is not specified we set it to the line number inside the mapping file.
+            // Assume that only image path and class label is given (old format).
+            classId = imagePath;
+            imagePath = sequenceKey;
+            sequenceKey = std::to_string(lineIndex);
+
+            if (classId.empty() || imagePath.empty())
+                RuntimeError("Invalid map file format, must contain 2 or 3 tab-delimited columns, line %" PRIu64 " in file %s.", lineIndex, mapPath.c_str());
+        }
+
+        // Skipping sequences that are not included in corpus.
+        auto key = msra::strfun::utf16(sequenceKey);
+        if (!corpus->IsIncluded(key))
+        {
+            continue;
+        }

        char* eptr;
        errno = 0;
@ -216,9 +288,10 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
            description.m_chunkId = curId;
            description.m_path = imagePath;
            description.m_classId = cid;
-            description.m_key.m_sequence = description.m_id;
+            description.m_key.m_sequence = stringRegistry[key];
            description.m_key.m_sample = 0;

+            m_keyToSequence[description.m_key.m_sequence] = m_imageSequences.size();
            m_imageSequences.push_back(description);
            RegisterByteReader(description.m_id, description.m_path, knownReaders);
        }
@ -280,8 +353,24 @@ cv::Mat ImageDataDeserializer::ReadImage(size_t seqId, const std::string& path,

 cv::Mat FileByteReader::Read(size_t, const std::string& path, bool grayscale)
 {
-	assert(!path.empty());
+    assert(!path.empty());

    return cv::imread(path, grayscale ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
 }
+
+static SequenceDescription s_invalidSequence{0, 0, 0, false};
+
+void ImageDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& result)
+{
+    auto index = m_keyToSequence.find(key.m_sequence);
+    // Checks whether it is a known sequence for us.
+    if (key.m_sample != 0 || index == m_keyToSequence.end())
+    {
+        result = s_invalidSequence;
+        return;
+    }
+
+    result = m_imageSequences[index->second];
+}
+
 }}}
--- a/Source/Readers/ImageReader/ImageDataDeserializer.h
+++ b/Source/Readers/ImageReader/ImageDataDeserializer.h
@ -8,8 +8,8 @@
 #include "DataDeserializerBase.h"
 #include "Config.h"
 #include "ByteReader.h"
-#include "ImageConfigHelper.h"
 #include <unordered_map>
+#include "CorpusDescriptor.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -21,6 +21,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 class ImageDataDeserializer : public DataDeserializerBase
 {
 public:
+    // A new constructor to support new compositional configuration,
+    // that allows composition of deserializers and transforms on inputs.
+    ImageDataDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config);
+
+    // TODO: This constructor should be deprecated in the future. Compositional config should be used instead.
    explicit ImageDataDeserializer(const ConfigParameters& config);

    // Gets sequences by specified ids. Order of returned sequences corresponds to the order of provided ids.
@ -32,9 +37,12 @@ public:
    // Gets sequence descriptions for the chunk.
    virtual void GetSequencesForChunk(size_t, std::vector<SequenceDescription>&) override;

+    // Gets sequence description by key.
+    void GetSequenceDescriptionByKey(const KeyType&, SequenceDescription&) override;
+
 private:
    // Creates a set of sequence descriptions.
-    void CreateSequenceDescriptions(std::string mapPath, size_t labelDimension, const ImageConfigHelper& config);
+    void CreateSequenceDescriptions(CorpusDescriptorPtr corpus, std::string mapPath, size_t labelDimension, bool isMultiCrop);

    // Image sequence descriptions. Currently, a sequence contains a single sample only.
    struct ImageSequenceDescription : public SequenceDescription
@ -53,6 +61,9 @@ private:
    // Sequence descriptions for all input data.
    std::vector<ImageSequenceDescription> m_imageSequences;

+    // Mapping of logical sequence key into sequence description.
+    std::map<size_t, size_t> m_keyToSequence;
+
    // Element type of the feature/label stream (currently float/double only).
    ElementType m_featureElementType;

--- a/Source/Readers/ImageReader/ImageReader.cpp
+++ b/Source/Readers/ImageReader/ImageReader.cpp
@ -13,9 +13,13 @@
 #include "ImageDataDeserializer.h"
 #include "FramePacker.h"
 #include <omp.h>
+#include "TransformController.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

+// TODO: This class should go away eventually.
+// TODO: The composition of packer + randomizer + different deserializers in a generic manner is done in the CompositeDataReader.
+// TODO: Currently preserving this for backward compatibility with current configs.
 ImageReader::ImageReader(MemoryProviderPtr provider,
                         const ConfigParameters& config)
    : m_seed(0), m_provider(provider)
@ -37,7 +41,7 @@ ImageReader::ImageReader(MemoryProviderPtr provider,

    auto deserializer = std::make_shared<ImageDataDeserializer>(config);

-    TransformerPtr randomizer;
+    SequenceEnumeratorPtr randomizer;
    // Request multi-threaded randomizer operation to speed up CPU-intensive image-decoding and transformations.
    const bool multithreadedGetNextSequences = true;
    if (configHelper.ShouldRandomize())
@ -51,35 +55,27 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
        randomizer = std::make_shared<NoRandomizer>(deserializer, multithreadedGetNextSequences);
    }

-    randomizer->Initialize(nullptr, config);
+    // Create transformations for a single feature stream.
+    std::wstring featureName = m_streams[configHelper.GetFeatureStreamId()]->m_name;
+    ConfigParameters featureStream = config(featureName);

-    auto cropper = std::make_shared<CropTransformer>();
-    cropper->Initialize(randomizer, config);
+    std::vector<Transformation> transformations;
+    transformations.push_back(Transformation{ std::make_shared<CropTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<ScaleTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<ColorTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<IntensityTransformer>(featureStream), featureName });
+    transformations.push_back(Transformation{ std::make_shared<MeanTransformer>(featureStream), featureName });

-    auto scaler = std::make_shared<ScaleTransformer>();
-    scaler->Initialize(cropper, config);
-
-    auto color = std::make_shared<ColorTransformer>();
-    color->Initialize(scaler, config);
-
-    auto intensity = std::make_shared<IntensityTransformer>();
-    intensity->Initialize(color, config);
-
-    auto mean = std::make_shared<MeanTransformer>();
-    mean->Initialize(intensity, config);
-
-    TransformerPtr last = mean;
    if (configHelper.GetDataFormat() == CHW)
    {
-        last = std::make_shared<TransposeTransformer>();
-        last->Initialize(mean, config);
+        transformations.push_back(Transformation{ std::make_shared<TransposeTransformer>(featureStream), featureName });
    }

-    m_transformer = last;
+    m_sequenceEnumerator = std::make_shared<TransformController>(transformations, randomizer);

    m_packer = std::make_shared<FramePacker>(
        m_provider,
-        m_transformer,
+        m_sequenceEnumerator,
        m_streams);
 }

@ -96,7 +92,7 @@ void ImageReader::StartEpoch(const EpochConfiguration& config)
        RuntimeError("Epoch size cannot be 0.");
    }

-    m_transformer->StartEpoch(config);
+    m_sequenceEnumerator->StartEpoch(config);
    m_packer->StartEpoch(config);
 }

--- a/Source/Readers/ImageReader/ImageReader.h
+++ b/Source/Readers/ImageReader/ImageReader.h
@ -6,8 +6,8 @@
 #pragma once

 #include "Reader.h"
-#include "ImageTransformers.h"
 #include "Packer.h"
+#include "SequenceEnumerator.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -34,7 +34,7 @@ private:
    std::vector<StreamDescriptionPtr> m_streams;

    // A head transformer in a list of transformers.
-    TransformerPtr m_transformer;
+    SequenceEnumeratorPtr m_sequenceEnumerator;

    // Packer.
    PackerPtr m_packer;
--- a/Source/Readers/ImageReader/ImageReader.vcxproj.filters
+++ b/Source/Readers/ImageReader/ImageReader.vcxproj.filters
@ -3,23 +3,11 @@
  <ItemGroup>
    <ClCompile Include="Exports.cpp" />
    <ClCompile Include="stdafx.cpp" />
-    <ClCompile Include="..\..\Common\fileutil.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\Common\File.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
    <ClCompile Include="dllmain.cpp" />
    <ClCompile Include="ImageTransformers.cpp" />
    <ClCompile Include="ImageDataDeserializer.cpp" />
    <ClCompile Include="ImageReader.cpp" />
    <ClCompile Include="ImageConfigHelper.cpp" />
-    <ClCompile Include="..\..\Common\Config.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp">
-      <Filter>Common</Filter>
-    </ClCompile>
    <ClCompile Include="ZipByteReader.cpp" />
  </ItemGroup>
  <ItemGroup>
--- a/Source/Readers/ImageReader/ImageTransformers.cpp
+++ b/Source/Readers/ImageReader/ImageTransformers.cpp
@ -10,11 +10,10 @@
 #include "ImageTransformers.h"
 #include "Config.h"
 #include "ConcStack.h"
-#include "ImageConfigHelper.h"
 #include "StringUtil.h"
 #include "ElementTypeUtils.h"

-namespace Microsoft { namespace MSR { namespace CNTK
+namespace Microsoft { namespace MSR { namespace CNTK 
 {

 struct ImageSequenceData : DenseSequenceData
@ -24,53 +23,51 @@ struct ImageSequenceData : DenseSequenceData
    SequenceDataPtr m_original;
 };

-void ImageTransformerBase::Initialize(TransformerPtr next,
-                                      const ConfigParameters &readerConfig)
+ImageTransformerBase::ImageTransformerBase(const ConfigParameters& readerConfig) : m_imageElementType(0)
 {
-    Base::Initialize(next, readerConfig);
-    m_seed = readerConfig(L"seed", (unsigned int)0);
-
-    ImageConfigHelper config(readerConfig);
-    size_t featureStreamId = config.GetFeatureStreamId();
-    m_appliedStreamIds.push_back(featureStreamId);
-    if (m_appliedStreamIds.size() != 1)
-    {
-        RuntimeError("Only a single feature stream is supported.");
-    }
-
-    const auto &inputStreams = GetInputStreams();
-    m_outputStreams.resize(inputStreams.size());
-    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
+    m_seed = readerConfig(L"seed", 0u);
 }

-SequenceDataPtr
-ImageTransformerBase::Apply(SequenceDataPtr sequence,
-                            const StreamDescription &inputStream,
-                            const StreamDescription & /*outputStream*/)
+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Currently for image transformations we only support dense streams of type double or float.
+StreamDescription ImageTransformerBase::Transform(const StreamDescription& inputStream)
 {
-    assert(inputStream.m_storageType == StorageType::dense);
-    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence.get());
-    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
-    int columns = static_cast<int>(dimensions.m_width);
-    int rows = static_cast<int>(dimensions.m_height);
-    int channels = static_cast<int>(dimensions.m_numChannels);
+    m_inputStream = inputStream;
+    m_outputStream = m_inputStream;

-    int typeId = 0;
-    if (inputStream.m_elementType == ElementType::tdouble)
+    if (m_inputStream.m_storageType != StorageType::dense)
    {
-        typeId = CV_64F;
+        LogicError("ImageTransformerBase supports only dense input streams.");
    }
-    else if (inputStream.m_elementType == ElementType::tfloat)
+
+    if (m_inputStream.m_elementType == ElementType::tdouble)
    {
-        typeId = CV_32F;
+        m_imageElementType = CV_64F;
+    }
+    else if (m_inputStream.m_elementType == ElementType::tfloat)
+    {
+        m_imageElementType = CV_32F;
    }
    else
    {
        RuntimeError("Unsupported type");
    }

+    return m_outputStream;
+}
+
+// Transforms a single sequence as open cv dense image. Called once per sequence.
+SequenceDataPtr ImageTransformerBase::Transform(SequenceDataPtr sequence)
+{
+    auto inputSequence = static_cast<const DenseSequenceData&>(*sequence);
+
+    ImageDimensions dimensions(*inputSequence.m_sampleLayout, HWC);
+    int columns = static_cast<int>(dimensions.m_width);
+    int rows = static_cast<int>(dimensions.m_height);
+    int channels = static_cast<int>(dimensions.m_numChannels);
+
    auto result = std::make_shared<ImageSequenceData>();
-    int type = CV_MAKETYPE(typeId, channels);
+    int type = CV_MAKETYPE(m_imageElementType, channels);
    cv::Mat buffer = cv::Mat(rows, columns, type, inputSequence.m_data);
    Apply(sequence->m_id, buffer);
    if (!buffer.isContinuous())
@ -92,19 +89,8 @@ ImageTransformerBase::Apply(SequenceDataPtr sequence,
 }

 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void CropTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
+CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
-    ImageTransformerBase::Initialize(next, readerConfig);
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void CropTransformer::InitFromConfig(const ConfigParameters &config)
-{
-    m_cropType = ParseCropType(config(L"cropType", ""));
-
    floatargvector cropRatio = config(L"cropRatio", "1.0");
    m_cropRatioMin = cropRatio[0];
    m_cropRatioMax = cropRatio[1];
@ -119,6 +105,8 @@ void CropTransformer::InitFromConfig(const ConfigParameters &config)

    m_jitterType = ParseJitterType(config(L"jitterType", ""));

+    m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", ""));
+
    if (!config.ExistsCurrent(L"hflip"))
    {
        m_hFlip = m_cropType == CropType::Random;
@ -136,7 +124,6 @@ void CropTransformer::StartEpoch(const EpochConfiguration &config)
    m_curAspectRatioRadius = m_aspectRatioRadius[config.m_epochIndex];
    if (!(0 <= m_curAspectRatioRadius && m_curAspectRatioRadius <= 1.0))
        InvalidArgument("aspectRatioRadius must be >= 0.0 and <= 1.0");
-
    ImageTransformerBase::StartEpoch(config);
 }

@ -178,27 +165,6 @@ void CropTransformer::Apply(size_t id, cv::Mat &mat)
    m_rngs.push(std::move(rng));
 }

-CropTransformer::CropType
-CropTransformer::ParseCropType(const std::string &src)
-{
-    if (src.empty() || AreEqualIgnoreCase(src, "center"))
-    {
-        return CropType::Center;
-    }
-
-    if (AreEqualIgnoreCase(src, "random"))
-    {
-        return CropType::Random;
-    }
-
-    if (AreEqualIgnoreCase(src, "multiview10"))
-    {
-        return CropType::MultiView10;
-    }
-
-    RuntimeError("Invalid crop type: %s.", src.c_str());
-}
-
 CropTransformer::RatioJitterType
 CropTransformer::ParseJitterType(const std::string &src)
 {
@ -226,7 +192,7 @@ CropTransformer::ParseJitterType(const std::string &src)
 }

 cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, int ccol,
-                                      double cropRatio, std::mt19937 &rng)
+                                          double cropRatio, std::mt19937 &rng)
 {
    assert(crow > 0);
    assert(ccol > 0);
@ -278,27 +244,27 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in
        int isubView = viewIndex % 5;
        switch (isubView)
        {
-        // top-left
+            // top-left
        case 0:
            xOff = 0;
            yOff = 0;
            break;
-        // top-right
+            // top-right
        case 1:
            xOff = ccol - cropSizeX;
            yOff = 0;
            break;
-        // bottom-left
+            // bottom-left
        case 2:
            xOff = 0;
            yOff = crow - cropSizeY;
            break;
-        // bottom-right
+            // bottom-right
        case 3:
            xOff = ccol - cropSizeX;
            yOff = crow - cropSizeY;
            break;
-        // center
+            // center
        case 4:
            xOff = (ccol - cropSizeX) / 2;
            yOff = (crow - cropSizeY) / 2;
@ -317,24 +283,13 @@ cv::Rect CropTransformer::GetCropRect(CropType type, int viewIndex, int crow, in

 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void ScaleTransformer::Initialize(TransformerPtr next,
-                                  const ConfigParameters &readerConfig)
+ScaleTransformer::ScaleTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
-    ImageTransformerBase::Initialize(next, readerConfig);
    m_interpMap.emplace("nearest", cv::INTER_NEAREST);
    m_interpMap.emplace("linear", cv::INTER_LINEAR);
    m_interpMap.emplace("cubic", cv::INTER_CUBIC);
    m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);

-    auto featureStreamIds = GetAppliedStreamIds();
-    const auto &feature = GetInputStreams()[featureStreamIds[0]];
-    m_dataType = feature->m_elementType == ElementType::tfloat ? CV_32F : CV_64F;
-
-    InitFromConfig(readerConfig(feature->m_name));
-}
-
-void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
-{
    m_imgWidth = config(L"width");
    m_imgHeight = config(L"height");
    m_imgChannels = config(L"channels");
@ -359,15 +314,24 @@ void ScaleTransformer::InitFromConfig(const ConfigParameters &config)
        m_interp.push_back(cv::INTER_LINEAR);
 }

+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Scale transformer transforms the stream so that all samples are of the same size.
+StreamDescription ScaleTransformer::Transform(const StreamDescription& inputStream)
+{
+    ImageTransformerBase::Transform(inputStream);
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(m_imgWidth, m_imgHeight, m_imgChannels).AsTensorShape(HWC));
+    return m_outputStream;
+}
+
 void ScaleTransformer::Apply(size_t id, cv::Mat &mat)
 {
    UNUSED(id);
+
    // If matrix has not been converted to the right type, do it now as rescaling
    // requires floating point type.
-    //
-    if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
+    if (mat.type() != CV_MAKETYPE(m_imageElementType, m_imgChannels))
    {
-        mat.convertTo(mat, m_dataType);
+        mat.convertTo(mat, m_imageElementType);
    }

    auto seed = GetSeed();
@ -375,7 +339,6 @@ void ScaleTransformer::Apply(size_t id, cv::Mat &mat)

    auto index = UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng);
    assert(m_interp.size() > 0);
-
    cv::resize(mat, mat, cv::Size((int)m_imgWidth, (int)m_imgHeight), 0, 0, m_interp[index]);

    m_rngs.push(std::move(rng));
@ -383,16 +346,7 @@ void ScaleTransformer::Apply(size_t id, cv::Mat &mat)

 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void MeanTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void MeanTransformer::InitFromConfig(const ConfigParameters &config)
+MeanTransformer::MeanTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
 {
    std::wstring meanFile = config(L"meanFile", L"");
    if (meanFile.empty())
@ -425,7 +379,7 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
    UNUSED(id);
    assert(m_meanImg.size() == cv::Size(0, 0) ||
           (m_meanImg.size() == mat.size() &&
-            m_meanImg.channels() == mat.channels()));
+           m_meanImg.channels() == mat.channels()));

    // REVIEW alexeyk: check type conversion (float/double).
    if (m_meanImg.size() == mat.size())
@ -434,48 +388,34 @@ void MeanTransformer::Apply(size_t id, cv::Mat &mat)
    }
 }

-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void TransposeTransformer::Initialize(TransformerPtr next,
-                                      const ConfigParameters &readerConfig)
+// The method describes how input stream is transformed to the output stream. Called once per applied stream.
+// Transpose transformer expects the dense input stream with samples as HWC and outputs CHW.
+StreamDescription TransposeTransformer::Transform(const StreamDescription& inputStream)
 {
-    TransformerBase::Initialize(next, readerConfig);
-
-    // Currently we only support a single stream.
-    ImageConfigHelper config(readerConfig);
-    size_t featureStreamId = config.GetFeatureStreamId();
-    m_appliedStreamIds.push_back(featureStreamId);
-
-    const auto &inputStreams = GetInputStreams();
-    m_outputStreams.resize(inputStreams.size());
-    std::copy(inputStreams.begin(), inputStreams.end(), m_outputStreams.begin());
-
-    for (auto id : m_appliedStreamIds)
+    m_inputStream = inputStream;
+    if (m_inputStream.m_storageType != StorageType::dense)
    {
-        auto &stream = inputStreams[id];
-
-        ImageDimensions dimensions(*stream->m_sampleLayout, HWC);
-
-        // Changing from NHWC to NCHW (note: row-major notation)
-        auto changedStream = std::make_shared<StreamDescription>(*stream);
-        changedStream->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
-        m_outputStreams[id] = changedStream;
+        LogicError("Transpose transformer supports only dense streams.");
    }
+
+    // Changing from NHWC to NCHW
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, HWC);
+    m_outputStream = m_inputStream;
+    m_outputStream.m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(CHW));
+    return m_outputStream;
 }

-SequenceDataPtr
-TransposeTransformer::Apply(SequenceDataPtr inputSequence,
-                            const StreamDescription &inputStream,
-                            const StreamDescription &outputStream)
+// Transformation of the sequence.
+SequenceDataPtr TransposeTransformer::Transform(SequenceDataPtr sequence)
 {
-    if (inputStream.m_elementType == ElementType::tdouble)
+    if (m_inputStream.m_elementType == ElementType::tdouble)
    {
-        return TypedApply<double>(inputSequence, inputStream, outputStream);
+        return TypedTransform<double>(sequence);
    }

-    if (inputStream.m_elementType == ElementType::tfloat)
+    if (m_inputStream.m_elementType == ElementType::tfloat)
    {
-        return TypedApply<float>(inputSequence, inputStream, outputStream);
+        return TypedTransform<float>(sequence);
    }

    RuntimeError("Unsupported type");
@ -490,21 +430,17 @@ struct DenseSequenceWithBuffer : DenseSequenceData
 };

 template <class TElemType>
-SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,
-                                                 const StreamDescription &inputStream,
-                                                 const StreamDescription &outputStream)
+SequenceDataPtr TransposeTransformer::TypedTransform(SequenceDataPtr sequence)
 {
-    assert(inputStream.m_storageType == StorageType::dense);
-    auto inputSequence = static_cast<DenseSequenceData&>(*sequence.get());
+    auto inputSequence = static_cast<DenseSequenceData&>(*sequence);
    assert(inputSequence.m_numberOfSamples == 1);
-    assert(inputStream.m_sampleLayout->GetNumElements() == outputStream.m_sampleLayout->GetNumElements());

-    size_t count = inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(inputStream.m_elementType);
+    size_t count = m_inputStream.m_sampleLayout->GetNumElements() * GetSizeByType(m_inputStream.m_elementType);

    auto result = std::make_shared<DenseSequenceWithBuffer>();
    result->m_buffer.resize(count);

-    ImageDimensions dimensions(*inputStream.m_sampleLayout, ImageLayoutKind::HWC);
+    ImageDimensions dimensions(*m_inputStream.m_sampleLayout, ImageLayoutKind::HWC);
    size_t rowCount = dimensions.m_height * dimensions.m_width;
    size_t channelCount = dimensions.m_numChannels;

@ -519,7 +455,7 @@ SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,
        }
    }

-    result->m_sampleLayout = outputStream.m_sampleLayout;
+    result->m_sampleLayout = m_outputStream.m_sampleLayout;
    result->m_data = result->m_buffer.data();
    result->m_numberOfSamples = inputSequence.m_numberOfSamples;
    return result;
@ -527,16 +463,7 @@ SequenceDataPtr TransposeTransformer::TypedApply(SequenceDataPtr sequence,

 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void IntensityTransformer::Initialize(TransformerPtr next,
-                                 const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void IntensityTransformer::InitFromConfig(const ConfigParameters &config)
+IntensityTransformer::IntensityTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
 {
    m_stdDev = config(L"intensityStdDev", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
    std::wstring intFile = config(L"intensityFile", L"");
@ -564,7 +491,6 @@ void IntensityTransformer::InitFromConfig(const ConfigParameters &config)
 void IntensityTransformer::StartEpoch(const EpochConfiguration &config)
 {
    m_curStdDev = m_stdDev[config.m_epochIndex];
-
    ImageTransformerBase::StartEpoch(config);
 }

@ -618,15 +544,7 @@ void IntensityTransformer::Apply(cv::Mat &mat)

 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void ColorTransformer::Initialize(TransformerPtr next, const ConfigParameters &readerConfig)
-{
-    ImageTransformerBase::Initialize(next, readerConfig);
-
-    auto featureStreamIds = GetAppliedStreamIds();
-    InitFromConfig(readerConfig(GetInputStreams()[featureStreamIds[0]]->m_name));
-}
-
-void ColorTransformer::InitFromConfig(const ConfigParameters &config)
+ColorTransformer::ColorTransformer(const ConfigParameters &config) : ImageTransformerBase(config)
 {
    m_brightnessRadius = config(L"brightnessRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
    m_contrastRadius = config(L"contrastRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
--- a/Source/Readers/ImageReader/ImageTransformers.h
+++ b/Source/Readers/ImageReader/ImageTransformers.h
@ -11,8 +11,8 @@

 #include "Transformer.h"
 #include "ConcStack.h"
-#include "TransformerBase.h"
 #include "Config.h"
+#include "ImageConfigHelper.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -20,45 +20,39 @@ class ConfigParameters;

 // Base class for image transformations based on OpenCV
 // that helps to wrap the sequences into OpenCV::Mat class.
-class ImageTransformerBase : public TransformerBase
+class ImageTransformerBase : public Transformer
 {
 public:
-    // Initializes the transformer.
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit ImageTransformerBase(const ConfigParameters& config);
+
+    void StartEpoch(const EpochConfiguration&) override {}
+
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream) override;
+
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;

 protected:
-    const std::vector<StreamId> &GetAppliedStreamIds() const override
-    {
-        return m_appliedStreamIds;
-    }
-
-    const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
-    {
-        return m_outputStreams;
-    }
-
    // Seed  getter.
    unsigned int GetSeed() const
    {
        return m_seed;
    }

-    using Base = TransformerBase;
+    using Base = Transformer;
    using UniRealT = std::uniform_real_distribution<double>;
    using UniIntT = std::uniform_int_distribution<int>;

-    // Applies transformation to the sequence.
-    SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                          const StreamDescription &inputStream,
-                          const StreamDescription &outputStream) override;
-
    // The only function that should be redefined by the inherited classes.
    virtual void Apply(size_t id, cv::Mat &from) = 0;

-private:
-    std::vector<StreamDescriptionPtr> m_outputStreams;
-    std::vector<StreamId> m_appliedStreamIds;
+protected:
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
    unsigned int m_seed;
+    int m_imageElementType;
+    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
 };

 // Crop transformation of the image.
@ -66,18 +60,12 @@ private:
 class CropTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit CropTransformer(const ConfigParameters& config);

 private:
    void Apply(size_t id, cv::Mat &mat) override;

 private:
-    enum class CropType
-    {
-        Center = 0,
-        Random = 1,
-        MultiView10 = 2
-    };
    enum class RatioJitterType
    {
        None = 0,
@ -86,11 +74,8 @@ private:
        UniArea = 3
    };

-    void InitFromConfig(const ConfigParameters &config);
-
    void StartEpoch(const EpochConfiguration &config) override;

-    CropType ParseCropType(const std::string &src);
    RatioJitterType ParseJitterType(const std::string &src);
    cv::Rect GetCropRect(CropType type, int viewIndex, int crow, int ccol, double cropRatio, std::mt19937 &rng);

@ -109,11 +94,11 @@ private:
 class ScaleTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next,
-                            const ConfigParameters &readerConfig) override;
+    explicit ScaleTransformer(const ConfigParameters& config);
+
+    StreamDescription Transform(const StreamDescription& inputStream) override;

 private:
-    void InitFromConfig(const ConfigParameters &config);
    void Apply(size_t id, cv::Mat &mat) override;

    using StrToIntMapT = std::unordered_map<std::string, int>;
@ -121,7 +106,6 @@ private:
    std::vector<int> m_interp;

    conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
-    int m_dataType;
    size_t m_imgWidth;
    size_t m_imgHeight;
    size_t m_imgChannels;
@ -131,45 +115,34 @@ private:
 class MeanTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next,
-                            const ConfigParameters &readerConfig) override;
+    explicit MeanTransformer(const ConfigParameters& config);

 private:
    void Apply(size_t id, cv::Mat &mat) override;
-    void InitFromConfig(const ConfigParameters &config);

    cv::Mat m_meanImg;
 };

 // Transpose transformation from HWC to CHW (note: row-major notation).
-class TransposeTransformer : public TransformerBase
+class TransposeTransformer : public Transformer
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit TransposeTransformer(const ConfigParameters&) {}

-protected:
-    const std::vector<StreamId>& GetAppliedStreamIds() const override
-    {
-        return m_appliedStreamIds;
-    }
+    void StartEpoch(const EpochConfiguration&) override {}

-    const std::vector<StreamDescriptionPtr>& GetOutputStreams() const override
-    {
-        return m_outputStreams;
-    }
+    // Transformation of the stream.
+    StreamDescription Transform(const StreamDescription& inputStream) override;

-    SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                          const StreamDescription &inputStream,
-                          const StreamDescription &outputStream) override;
+    // Transformation of the sequence.
+    SequenceDataPtr Transform(SequenceDataPtr sequence) override;

 private:
    template <class TElement>
-    SequenceDataPtr TypedApply(SequenceDataPtr inputSequence,
-                               const StreamDescription &inputStream,
-                               const StreamDescription &outputStream);
+    SequenceDataPtr TypedTransform(SequenceDataPtr inputSequence);

-    std::vector<StreamDescriptionPtr> m_outputStreams;
-    std::vector<StreamId> m_appliedStreamIds;
+    StreamDescription m_inputStream;
+    StreamDescription m_outputStream;
 };

 // Intensity jittering based on PCA transform as described in original AlexNet paper
@ -180,11 +153,9 @@ private:
 class IntensityTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit IntensityTransformer(const ConfigParameters& config);

 private:
-    void InitFromConfig(const ConfigParameters &config);
-
    void StartEpoch(const EpochConfiguration &config) override;

    void Apply(size_t id, cv::Mat &mat) override;
@ -205,11 +176,9 @@ private:
 class ColorTransformer : public ImageTransformerBase
 {
 public:
-    void Initialize(TransformerPtr next, const ConfigParameters &readerConfig) override;
+    explicit ColorTransformer(const ConfigParameters& config);

 private:
-    void InitFromConfig(const ConfigParameters &config);
-
    void StartEpoch(const EpochConfiguration &config) override;

    void Apply(size_t id, cv::Mat &mat) override;
--- a/Source/Readers/ReaderLib/BlockRandomizer.h
+++ b/Source/Readers/ReaderLib/BlockRandomizer.h
@ -7,7 +7,7 @@

 #include <vector>

-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "DataDeserializer.h"
 #include "ChunkRandomizer.h"
 #include "SequenceRandomizer.h"
@ -31,8 +31,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // This class is responsible for decimation and loading the data chunks in to memory.
 // Actual randomization happens in ChunkRandomizer and SequenceRandomizer.
 // TODO: The behavior can be simplified by only randomizing sequences forward.
-// TODO: The layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
-class BlockRandomizer : public Transformer
+class BlockRandomizer : public SequenceEnumerator
 {
 public:
    // Currently, decimation based on sequences or chunks is supported.
@ -50,8 +49,6 @@ public:
        bool useLegacyRandomization = false,
        bool multithreadedGetNextSequences = false);

-    virtual void Initialize(TransformerPtr, const ConfigParameters&) override {};
-
    // Starts a new epoch.
    virtual void StartEpoch(const EpochConfiguration& config) override;

--- a/Source/Readers/ReaderLib/Bundler.cpp
+++ b/Source/Readers/ReaderLib/Bundler.cpp
@ -68,6 +68,8 @@ void Bundler::CreateChunkDescriptions()
        return;
    }

+    m_takePrimarySequenceLength = true;
+
    // Otherwise build bundling chunks using underlying deserializers.
    std::vector<SequenceDescription> sequenceDescriptions;
    sequenceDescriptions.reserve(chunks.front()->m_numberOfSequences);
@ -85,6 +87,7 @@ void Bundler::CreateChunkDescriptions()
        {
            auto sequence = sequenceDescriptions[sequenceIndex];
            bool isValid = true;
+            size_t sequenceSamples = sequence.m_numberOfSamples;
            for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
            {
                m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequenceDescriptions[sequenceIndex].m_key, s);
@ -94,12 +97,18 @@ void Bundler::CreateChunkDescriptions()
                    invalid.insert(sequenceIndex);
                    break;
                }
+
+                sequenceSamples = std::max(sequenceSamples, s.m_numberOfSamples);
            }

            if (isValid)
            {
-                numberOfSamples += sequence.m_numberOfSamples;
+                numberOfSamples += sequenceSamples;
                numberOfSequences++;
+
+                // Check whether the primary stream has the longest sequence.
+                // If yes, we can optimize exposed sequence descriptions in GetSequencesByChunk.
+                m_takePrimarySequenceLength = m_takePrimarySequenceLength && (sequenceSamples == sequence.m_numberOfSamples);
            }
        }

@ -130,24 +139,52 @@ void Bundler::GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescripti
    ChunkDescriptionPtr original = chunk->m_original;
    m_driver->GetSequencesForChunk(original->m_id, sequences);

-    // Can return because all sequences are clean.
-    if (chunk->m_invalid.empty())
-    {
-        return;
-    }
-
-    // Do cleansing.
    std::vector<SequenceDescription> result;
-    result.reserve(sequences.size());
-    for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
+    if (m_takePrimarySequenceLength) // No need to consult other deserializers.
    {
-        if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
+        // Can return because all sequences are clean.
+        if (chunk->m_invalid.empty())
        {
-            continue;
+            return;
        }

-        result.push_back(sequences[sequenceIndex]);
+        // Do cleansing.
+        result.reserve(sequences.size());
+        for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
+        {
+            if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
+            {
+                continue;
+            }
+
+            result.push_back(sequences[sequenceIndex]);
+        }
    }
+    else // need to get the max sequence length from other deserializers.
+         // TODO: This will change when the sequence length will be exposed per stream.
+    {
+        result.reserve(sequences.size());
+        SequenceDescription s;
+        for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
+        {
+            if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
+            {
+                continue;
+            }
+
+            auto sequence = sequences[sequenceIndex];
+            size_t sequenceSamples = sequence.m_numberOfSamples;
+            for (size_t deserializerIndex = 1; deserializerIndex < m_deserializers.size(); ++deserializerIndex)
+            {
+                m_deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequence.m_key, s);
+                assert(s.m_isValid);
+                sequenceSamples = std::max(sequenceSamples, s.m_numberOfSamples);
+            }
+            sequence.m_numberOfSamples = sequenceSamples;
+            result.push_back(sequence);
+        }
+    }
+
    std::swap(sequences, result);
 }

@ -159,8 +196,8 @@ class Bundler::BundlingChunk : public Chunk
    size_t m_chunkId;

    // A mapping between exposed sequence id and inner chunk for each deserializer.
-    // Index i of the vector maps to the chunk of inner sequence (i / m_numberOfInputs) of
-    // deserializer (i % m_numberOfInputs).
+    // Index i of the vector maps to the chunk of inner sequence (i / number of deserializers) of
+    // deserializer (i % number of deserializers).
    std::vector<ChunkPtr> m_innerChunks;
    // A mapping between exposed sequence id and inner sequence id for each deserializer.
    // Indices as above.
@ -176,15 +213,14 @@ public:
        ChunkDescriptionPtr original = chunk->m_original;

        auto& deserializers = m_parent->m_deserializers;
-        assert(numberOfInputs == deserializers.size());
        std::vector<SequenceDescription> sequences;
        sequences.reserve(original->m_numberOfSequences);

        // Creating chunk mapping.
        m_parent->m_driver->GetSequencesForChunk(original->m_id, sequences);
        ChunkPtr drivingChunk = m_parent->m_driver->GetChunk(original->m_id);
-        m_sequenceToSequence.resize(m_numberOfInputs * sequences.size());
-        m_innerChunks.resize(m_numberOfInputs * sequences.size());
+        m_sequenceToSequence.resize(deserializers.size() * sequences.size());
+        m_innerChunks.resize(deserializers.size() * sequences.size());
        for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
        {
            if (chunk->m_invalid.find(sequenceIndex) != chunk->m_invalid.end())
@ -192,7 +228,7 @@ public:
                continue;
            }

-            size_t currentIndex = sequenceIndex * m_numberOfInputs;
+            size_t currentIndex = sequenceIndex * deserializers.size();
            m_sequenceToSequence[currentIndex] = sequences[sequenceIndex].m_id;
            m_innerChunks[currentIndex] = drivingChunk;
        }
@ -210,7 +246,7 @@ public:
                    continue;
                }

-                size_t currentIndex = sequenceIndex * m_numberOfInputs + deserializerIndex;
+                size_t currentIndex = sequenceIndex * deserializers.size() + deserializerIndex;
                deserializers[deserializerIndex]->GetSequenceDescriptionByKey(sequences[sequenceIndex].m_key, s);
                m_sequenceToSequence[currentIndex] = s.m_id;

@ -235,7 +271,7 @@ public:
    virtual void GetSequence(size_t sequenceId, std::vector<SequenceDataPtr>& result) override
    {
        result.reserve(m_numberOfInputs);
-        size_t currentIndex = sequenceId * m_numberOfInputs;
+        size_t currentIndex = sequenceId * m_parent->m_deserializers.size();
        for (int i = 0; i < m_parent->m_deserializers.size(); ++i)
        {
            size_t originalSequenceId = m_sequenceToSequence[currentIndex + i];
--- a/Source/Readers/ReaderLib/Bundler.h
+++ b/Source/Readers/ReaderLib/Bundler.h
@ -53,6 +53,11 @@ private:
    // correct number of samples/sequences they contain.
    // If this flag is set to false, no cleaning will be done, so additional overhead.
    bool m_cleanse;
+
+    // If flag is set to true the sequence length is counted by the primary deserializer only.
+    // Used for optimization when sequences between different deserializers are of the same length
+    // (i.e. often in speech)
+    bool m_takePrimarySequenceLength;
 };

 }}}
--- a/Source/Readers/ReaderLib/ChunkCache.cpp
+++ b/Source/Readers/ReaderLib/ChunkCache.cpp
@ -0,0 +1,26 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include "ChunkCache.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+ChunkPtr ChunkCache::GetChunk(size_t chunkId)
+{
+    auto it = m_chunkMap.find(chunkId);
+    if (it != m_chunkMap.end())
+    {
+        return it->second;
+    }
+ 
+    ChunkPtr chunk = m_deserializer->GetChunk(chunkId);
+    m_chunkMap[chunkId] = chunk;
+ 
+    return chunk;
+}
+
+} } }
--- a/Source/Readers/ReaderLib/ChunkCache.h
+++ b/Source/Readers/ReaderLib/ChunkCache.h
@ -0,0 +1,56 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <map>
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// A cache to store the complete dataset (all chunks) in memory. The caching can
+// be switched on/off by a boolean flag in the reader config section, independent 
+// of the randomization and chunking parameters. The caching should only be enabled 
+// when the whole dataset fits in memory.
+// Implemented as a wrapping proxy around a deserializer that stores pointers to
+// all chunks it sees in an internal map.
+class ChunkCache : public IDataDeserializer
+{
+public:
+
+    ChunkCache(IDataDeserializerPtr deserializer) : m_deserializer(deserializer) { }
+
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return m_deserializer->GetStreamDescriptions();
+    }
+
+    virtual ChunkDescriptions GetChunkDescriptions() override
+    {
+        return m_deserializer->GetChunkDescriptions();
+    }
+
+    virtual void GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescription>& descriptions) override
+    {
+        return m_deserializer->GetSequencesForChunk(chunkId, descriptions);
+    }
+
+    virtual void GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& description) override
+    {
+        return m_deserializer->GetSequenceDescriptionByKey(key, description);
+    }
+
+    // Gets chunk data given its id.
+    virtual ChunkPtr GetChunk(size_t chunkId);
+
+private:
+    // A map of currently loaded chunks
+    std::map<size_t, ChunkPtr> m_chunkMap;
+    IDataDeserializerPtr m_deserializer;
+
+    DISABLE_COPY_AND_MOVE(ChunkCache);
+};
+
+} } }
--- a/Source/Readers/ReaderLib/ConfigUtil.h
+++ b/Source/Readers/ReaderLib/ConfigUtil.h
@ -0,0 +1,40 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "Config.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// Helper function to get sections that contains specified parameter.
+inline std::vector<std::string> TryGetSectionsWithParameter(const ConfigParameters& config, const std::string& parameterName)
+{
+    std::vector<std::string> sectionNames;
+    for (const std::pair<std::string, ConfigParameters>& section : config)
+    {
+        if (section.second.ExistsCurrent(parameterName))
+        {
+            sectionNames.push_back(section.first);
+        }
+    }
+
+    return sectionNames;
+}
+
+// Helper function to get sections that contains specified parameter. Throws if the parameter does not exist.
+inline std::vector<std::string> GetSectionsWithParameter(const std::string& reader, const ConfigParameters& config, const std::string& parameterName)
+{
+    auto result = TryGetSectionsWithParameter(config, parameterName);
+    if (result.empty())
+    {
+        RuntimeError("%s requires %s parameter.", reader.c_str(), parameterName.c_str());
+    }
+    return result;
+}
+
+}}}
--- a/Source/Readers/ReaderLib/FramePacker.h
+++ b/Source/Readers/ReaderLib/FramePacker.h
@ -15,12 +15,10 @@ class FramePacker : public SequencePacker
 public:
    FramePacker(
        MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
        const std::vector<StreamDescriptionPtr>& streams) :
-        SequencePacker(memoryProvider, transformer, streams)
-    {
-
-    }
+        SequencePacker(memoryProvider, sequenceEnumerator, streams)
+    {}

 private:

--- a/Source/Readers/ReaderLib/NoRandomizer.cpp
+++ b/Source/Readers/ReaderLib/NoRandomizer.cpp
@ -42,10 +42,6 @@ NoRandomizer::NoRandomizer(IDataDeserializerPtr deserializer, bool multithreaded
    m_totalNumberOfSamples = sampleCount;
 }

-void NoRandomizer::Initialize(TransformerPtr, const ConfigParameters&)
-{
-}
-
 size_t NoRandomizer::GetChunkIndexOf(size_t samplePosition)
 {
    auto result = std::upper_bound(m_chunkSampleOffset.begin(), m_chunkSampleOffset.end(), samplePosition);
--- a/Source/Readers/ReaderLib/NoRandomizer.h
+++ b/Source/Readers/ReaderLib/NoRandomizer.h
@ -6,10 +6,8 @@
 #pragma once

 #include <vector>
-#include <map>
-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "DataDeserializer.h"
-#include "SequenceRandomizer.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -17,13 +15,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 // Used training where the training data has already been pre - randomized.
 // TODO: currently this code moved from the old block randomizer.
 // TODO: The class will be further refactored and common based will be extracted with BlockRandomizer.
-// TODO: This layering will be changed, when we move transformers under the randomizer, it won't be a transformer anymore.
-class NoRandomizer : public Transformer
+class NoRandomizer : public SequenceEnumerator
 {
 public:
    NoRandomizer(IDataDeserializerPtr deserializer, bool multithreadedGetNextSequences = false);

-    virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
    virtual void StartEpoch(const EpochConfiguration& config) override;
    virtual Sequences GetNextSequences(size_t sampleCount) override;
    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
--- a/Source/Readers/ReaderLib/PackerBase.cpp
+++ b/Source/Readers/ReaderLib/PackerBase.cpp
@ -34,13 +34,13 @@ void PackerBase::StartEpoch(const EpochConfiguration& config)
 }

 PackerBase::PackerBase(MemoryProviderPtr memoryProvider,
-    TransformerPtr transformer,
+    SequenceEnumeratorPtr sequenceEnumerator,
    const std::vector<StreamDescriptionPtr>& streams) :
-    m_transformer(transformer),
+    m_sequenceEnumerator(sequenceEnumerator),
    m_minibatchSize(0),
    m_outputStreamDescriptions(streams)
 {
-    m_inputStreamDescriptions = m_transformer->GetStreamDescriptions();
+    m_inputStreamDescriptions = sequenceEnumerator->GetStreamDescriptions();
    assert(m_inputStreamDescriptions.size() != 0);
    assert(m_inputStreamDescriptions.size() == m_outputStreamDescriptions.size());

--- a/Source/Readers/ReaderLib/PackerBase.h
+++ b/Source/Readers/ReaderLib/PackerBase.h
@ -7,9 +7,8 @@

 #include "Reader.h"
 #include "MemoryProvider.h"
-#include "Transformer.h"
+#include "SequenceEnumerator.h"
 #include "Packer.h"
-#include <deque>

 namespace Microsoft { namespace MSR { namespace CNTK {

@ -34,7 +33,7 @@ protected:
    };

    PackerBase(MemoryProviderPtr memoryProvider,
-               TransformerPtr transformer,
+               SequenceEnumeratorPtr sequenceEnumerator,
               const std::vector<StreamDescriptionPtr>& streams);

    typedef std::vector<SequenceDataPtr> StreamBatch;
@ -57,7 +56,7 @@ protected:
    // (sampleOffset is equal to the sum of sample sizes of all preceding samples).
    void PackDenseSample(char* destination, SequenceDataPtr sequence, size_t sampleOffset, size_t sampleSize);

-    TransformerPtr m_transformer;
+    SequenceEnumeratorPtr m_sequenceEnumerator;

    // Input stream descriptions provided by the transformer.
    std::vector<StreamDescriptionPtr> m_outputStreamDescriptions;
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj
@ -40,17 +40,20 @@
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
+    <ClInclude Include="ConfigUtil.h" />
    <ClInclude Include="CorpusDescriptor.h" />
    <ClInclude Include="Bundler.h" />
+    <ClInclude Include="ChunkCache.h" />
    <ClInclude Include="ChunkRandomizer.h" />
+    <ClInclude Include="TransformController.h" />
    <ClInclude Include="DataDeserializerBase.h" />
    <ClInclude Include="BlockRandomizer.h" />
    <ClInclude Include="Packer.h" />
    <ClInclude Include="PackerBase.h" />
+    <ClInclude Include="SequenceEnumerator.h" />
    <ClInclude Include="SequencePacker.h" />
    <ClInclude Include="SequenceRandomizer.h" />
    <ClInclude Include="StringToIdMap.h" />
-    <ClInclude Include="TransformerBase.h" />
    <ClInclude Include="NoRandomizer.h" />
    <ClInclude Include="CudaMemoryProvider.h" />
    <ClInclude Include="DataDeserializer.h" />
@ -65,6 +68,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="Bundler.cpp" />
+    <ClCompile Include="ChunkCache.cpp" />
    <ClCompile Include="ChunkRandomizer.cpp" />
    <ClCompile Include="NoRandomizer.cpp" />
    <ClCompile Include="BlockRandomizer.cpp" />
@ -78,4 +82,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
+++ b/Source/Readers/ReaderLib/ReaderLib.vcxproj.filters
@ -28,9 +28,6 @@
    <ClInclude Include="ElementTypeUtils.h">
      <Filter>Utils</Filter>
    </ClInclude>
-    <ClInclude Include="TransformerBase.h">
-      <Filter>Transformers</Filter>
-    </ClInclude>
    <ClInclude Include="DataDeserializerBase.h">
      <Filter>Deserializers</Filter>
    </ClInclude>
@ -64,9 +61,21 @@
    <ClInclude Include="TruncatedBpttPacker.h">
      <Filter>Packers</Filter>
    </ClInclude>
-    <ClInclude Include="..\ExperimentalHTKMLFReader\CorpusDescriptor.h">
+    <ClInclude Include="SequenceEnumerator.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="ConfigUtil.h">
      <Filter>Utils</Filter>
    </ClInclude>
+    <ClInclude Include="ChunkCache.h">
+      <Filter>Utils</Filter>
+    </ClInclude>
+    <ClInclude Include="CorpusDescriptor.h">
+      <Filter>Interfaces</Filter>
+    </ClInclude>
+    <ClInclude Include="TransformController.h">
+      <Filter>Transformers</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="NoRandomizer.cpp">
@ -99,6 +108,9 @@
    <ClCompile Include="TruncatedBpttPacker.cpp">
      <Filter>Packers</Filter>
    </ClCompile>
+    <ClCompile Include="ChunkCache.cpp">
+      <Filter>Utils</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <Filter Include="Interfaces">
--- a/Source/Readers/ReaderLib/ReaderShim.h
+++ b/Source/Readers/ReaderLib/ReaderShim.h
@ -33,6 +33,13 @@ public:

    virtual void Destroy() override
    {
+        // Make sure there are no outstanding reads.
+        if (m_prefetchTask.valid())
+        {
+            // If there are some, give them time to finish.
+            m_prefetchTask.wait_for(std::chrono::seconds(5));
+        }
+
        delete this;
    }

--- a/Source/Readers/ReaderLib/SequenceEnumerator.h
+++ b/Source/Readers/ReaderLib/SequenceEnumerator.h
@ -0,0 +1,52 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <vector>
+#include "DataDeserializer.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+class ConfigParameters;
+
+// Defines a set of sequences for a set of streams.
+// Return by the sequence enumerator.
+struct Sequences
+{
+    // Data for up to a requested number of sequences.
+    // Indices in the outer vector have to correspond to the stream ids returned from the GetStreamDescriptions().
+    std::vector<std::vector<SequenceDataPtr>> m_data;
+
+    // Indicates whether the epoch ends with the data returned.
+    bool m_endOfEpoch = false;
+};
+
+class SequenceEnumerator;
+typedef std::shared_ptr<SequenceEnumerator> SequenceEnumeratorPtr;
+
+// Sequence enumerator is internal interface used by the packer to get a set of new sequences.
+// It is implemented either by different randomizers or by TransformController that can wrap the randomizer
+// and apply different transforms on top of data.
+
+// This interface is not exposed to the developers of deserializers/plugins, internal to CNTK.
+class SequenceEnumerator
+{
+public:
+    // Describes streams the sequence enumerator produces.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+
+    // Sets current epoch configuration.
+    virtual void StartEpoch(const EpochConfiguration& config) = 0;
+
+    // Gets next sequences up to a maximum count of samples.
+    virtual Sequences GetNextSequences(size_t sampleCount) = 0;
+
+    virtual ~SequenceEnumerator()
+    {
+    }
+};
+
+}}}
--- a/Source/Readers/ReaderLib/SequencePacker.cpp
+++ b/Source/Readers/ReaderLib/SequencePacker.cpp
@ -38,7 +38,7 @@ MBLayoutPtr SequencePacker::CreateMBLayout(const StreamBatch& batch)

 Minibatch SequencePacker::ReadMinibatch()
 {
-    auto sequences = m_transformer->GetNextSequences(m_minibatchSize);
+    auto sequences = m_sequenceEnumerator->GetNextSequences(m_minibatchSize);
    const auto& batch = sequences.m_data;

    Minibatch minibatch(sequences.m_endOfEpoch);
--- a/Source/Readers/ReaderLib/SequencePacker.h
+++ b/Source/Readers/ReaderLib/SequencePacker.h
@ -16,9 +16,9 @@ class SequencePacker : public PackerBase
 public:
    SequencePacker(
        MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
        const std::vector<StreamDescriptionPtr>& streams) :
-        PackerBase(memoryProvider, transformer, streams)
+        PackerBase(memoryProvider, sequenceEnumerator, streams)
    {

    }
--- a/Source/Readers/ReaderLib/StringToIdMap.h
+++ b/Source/Readers/ReaderLib/StringToIdMap.h
@ -33,7 +33,7 @@ public:
    }

    // Tries to get a value by id.
-    bool TryGet(const TString& value, size_t& id)
+    bool TryGet(const TString& value, size_t& id) const
    {
        const auto& it = m_values.find(value);
        if (it == m_values.end())
--- a/Source/Readers/ReaderLib/TransformController.h
+++ b/Source/Readers/ReaderLib/TransformController.h
@ -0,0 +1,105 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#pragma once
+
+#include <set>
+
+#include "Transformer.h"
+#include "SequenceEnumerator.h"
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// A pair of a transformer and the stream name to which the transformer should be a applied.
+struct Transformation
+{
+    TransformerPtr m_transformer;
+    std::wstring m_streamName;
+};
+
+// A class responsible for applying a list of transformers to sequences and stream descriptions.
+// Delegates retrieving of sequences to another sequence provider(such as randomizer) and applies transformations after retrieving.
+// Usually used by the packer to get next set of sequences.
+class TransformController : public SequenceEnumerator
+{
+public:
+    TransformController(const std::vector<Transformation>& transformations, SequenceEnumeratorPtr sequenceProvider)
+        : m_sequenceProvider(sequenceProvider)
+    {
+        // Applying transformations to stream descriptions,
+        // i.e. a transformation can change a stream from dense to sparse.
+        std::vector<StreamDescriptionPtr> transformedStreams = m_sequenceProvider->GetStreamDescriptions();
+        for (auto& t : transformations)
+        {
+            size_t streamId = GetStreamId(t.m_streamName, transformedStreams);
+            m_transformations.push_back(std::make_pair(t, streamId));
+            transformedStreams[streamId] = std::make_shared<StreamDescription>(t.m_transformer->Transform(*transformedStreams[streamId]));
+        }
+        m_outputStreams = transformedStreams;
+    }
+
+    // Sets configuration for the current epoch.
+    // Some transformers can change their config based on the epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) override
+    {
+        assert(m_sequenceProvider != nullptr);
+        for (auto& t : m_transformations)
+        {
+            t.first.m_transformer->StartEpoch(config);
+        }
+
+        m_sequenceProvider->StartEpoch(config);
+    }
+
+    // Description of streams that the transformer provides.
+    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
+    {
+        return m_outputStreams;
+    }
+
+    // Gets next sequences up to a maximum count of samples,
+    // applying transformers to particular streams.
+    virtual Sequences GetNextSequences(size_t sampleCount) override
+    {
+        assert(m_sequenceProvider != nullptr);
+        Sequences sequences = m_sequenceProvider->GetNextSequences(sampleCount);
+        if (sequences.m_data.empty())
+        {
+            return sequences;
+        }
+
+#pragma omp parallel for schedule(dynamic)
+        for (int j = 0; j < sequences.m_data.front().size(); ++j)
+        {
+            for (auto& t : m_transformations)
+            {
+                sequences.m_data[t.second][j] = t.first.m_transformer->Transform(sequences.m_data[t.second][j]);
+            }
+        }
+
+        return sequences;
+    }
+
+private:
+    size_t GetStreamId(const std::wstring streamName, const std::vector<StreamDescriptionPtr>& streams) const
+    {
+        for (const auto& s : streams)
+        {
+            if (s->m_name == streamName)
+            {
+                return s->m_id;
+            }
+        }
+
+        assert(false);
+        LogicError("Unexpected stream specified for transformation.");
+    }
+
+    SequenceEnumeratorPtr m_sequenceProvider;
+    std::vector<StreamDescriptionPtr> m_outputStreams;
+    std::vector<std::pair<Transformation, size_t>> m_transformations;
+};
+
+}}}
--- a/Source/Readers/ReaderLib/Transformer.h
+++ b/Source/Readers/ReaderLib/Transformer.h
@ -5,57 +5,33 @@

 #pragma once

-#include <vector>
 #include "DataDeserializer.h"

 namespace Microsoft { namespace MSR { namespace CNTK {

-class ConfigParameters;
-
-// Defines a set of sequences.
-struct Sequences
-{
-    Sequences()
-        : m_endOfEpoch(false)
-    {
-    }
-
-    // Data for up to a requested number of sequences.
-    // Indices in the inner vector have to correspond to the stream IDs
-    // given by GetStream().
-    std::vector<std::vector<SequenceDataPtr>> m_data;
-
-    // Indicates whether the epoch ends with the data returned.
-    bool m_endOfEpoch;
-};
-
 class Transformer;
 typedef std::shared_ptr<Transformer> TransformerPtr;

 // Defines a data transformation interface.
 // Transformers are responsible for doing custom transformation of sequences.
 // For example for images, there could be scale, crop, or median transformation.
-// TODO: Adopt to the C#/Java iterator pattern.
 class Transformer
 {
 public:
-    // Initialization.
-    virtual void Initialize(
-        TransformerPtr next,
-        const ConfigParameters& readerConfig) = 0;
+    // Starts a new epoch. Some transformers have to change their configuration
+    // based on the epoch.
+    virtual void StartEpoch(const EpochConfiguration &config) = 0;

-    // Describes streams the transformer produces.
-    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const = 0;
+    // Transformers are applied on a particular input stream - this method should describe
+    // how inputStream is transformed to the output stream (return value)
+    virtual StreamDescription Transform(const StreamDescription& inputStream) = 0;

-    // Sets current epoch configuration.
-    virtual void StartEpoch(const EpochConfiguration& config) = 0;
-
-    // Gets next sequences up to a maximum count of samples.
-    // The return value can be used until the next call to GetNextSequences.
-    virtual Sequences GetNextSequences(size_t sampleCount) = 0;
+    // This method should describe how input sequences is transformed to the output sequence.
+    virtual SequenceDataPtr Transform(SequenceDataPtr inputSequence) = 0;

    virtual ~Transformer()
    {
    }
 };
-} } }
+
+}}}
--- a/Source/Readers/ReaderLib/TransformerBase.h
+++ b/Source/Readers/ReaderLib/TransformerBase.h
@ -1,91 +0,0 @@
-//
-// Copyright (c) Microsoft. All rights reserved.
-// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
-//
-
-#pragma once
-
-#include <set>
-
-#include "Transformer.h"
-
-namespace Microsoft { namespace MSR { namespace CNTK {
-
-class TransformerBase : public Transformer
-{
-public:
-    // Initializes the transformer.
-    virtual void Initialize(TransformerPtr next,
-                            const ConfigParameters &) override
-    {
-        m_next = next;
-        m_inputStreams = m_next->GetStreamDescriptions();
-    }
-
-    // Sets configuration for the current epoch.
-    virtual void StartEpoch(const EpochConfiguration &config) override
-    {
-        assert(m_next != nullptr);
-        m_next->StartEpoch(config);
-    }
-
-    // Description of streams that the transformer provides.
-    virtual std::vector<StreamDescriptionPtr> GetStreamDescriptions() const override
-    {
-        return this->GetOutputStreams();
-    }
-
-    // Gets next sequences up to a maximum count of samples.
-    // Sequences contains data for all streams.
-    virtual Sequences GetNextSequences(size_t sampleCount) override
-    {
-        assert(m_next != nullptr);
-        Sequences samples = m_next->GetNextSequences(sampleCount);
-
-        if (samples.m_data.empty())
-        {
-            return samples;
-        }
-
-        const auto &appliedStreamIds = GetAppliedStreamIds();
-        const auto &outputStreams = GetOutputStreams();
-
-        // TODO: Move parallelization on the outer loop with collapse.
-        for (int j = 0; j < appliedStreamIds.size(); ++j)
-        {
-            size_t streamId = appliedStreamIds[j];
-            auto& allSamples = samples.m_data[streamId];
-
-#pragma omp parallel for schedule(dynamic)
-            for (int i = 0; i < allSamples.size(); ++i)
-            {
-                allSamples[i] = Apply(allSamples[i], *m_inputStreams[streamId], *outputStreams[streamId]);
-            }
-        }
-        return samples;
-    }
-
-protected:
-    virtual const std::vector<StreamId> &GetAppliedStreamIds() const = 0;
-    virtual const std::vector<StreamDescriptionPtr> &GetOutputStreams() const
-    {
-        return m_inputStreams;
-    }
-
-    const std::vector<StreamDescriptionPtr> &GetInputStreams()
-    {
-        return m_inputStreams;
-    }
-
-private:
-    // Applies transformation to the sequence.
-    virtual SequenceDataPtr Apply(SequenceDataPtr inputSequence,
-                                  const StreamDescription &inputStream,
-                                  const StreamDescription &outputStream) = 0;
-
-    TransformerPtr m_next;
-    std::vector<StreamId> m_featureStreamIds;
-    std::vector<StreamDescriptionPtr> m_inputStreams;
-};
-
-}}}
--- a/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
+++ b/Source/Readers/ReaderLib/TruncatedBpttPacker.cpp
@ -107,9 +107,9 @@ struct SequenceBuffer

 TruncatedBPTTPacker::TruncatedBPTTPacker(
    MemoryProviderPtr memoryProvider,
-    TransformerPtr transformer,
+    SequenceEnumeratorPtr sequenceEnumerator,
    const vector<StreamDescriptionPtr>& streams)
-    : PackerBase(memoryProvider, transformer, streams),
+    : PackerBase(memoryProvider, sequenceEnumerator, streams),
    m_truncationSize(0)
 {
    auto sparseOutput = find_if(m_outputStreamDescriptions.begin(), m_outputStreamDescriptions.end(), [](const StreamDescriptionPtr& s){ return s->m_storageType == StorageType::sparse_csc; });
@ -148,6 +148,15 @@ void TruncatedBPTTPacker::StartEpoch(const EpochConfiguration& config)
        // Estimating the number of parallel sequences to pack (slots) from the minibatch size and truncation size.
        m_numParallelSequences = max(1, (int)floor(m_minibatchSize / m_truncationSize));

+        if (config.m_numberOfWorkers > m_numParallelSequences)
+        {
+            InvalidArgument("Too many workers for minibatch size; please increase minibatch size or decrease number of workers.");
+        }
+
+        m_numParallelSequences =
+            (m_numParallelSequences / config.m_numberOfWorkers) +
+            (config.m_workerRank < (m_numParallelSequences % config.m_numberOfWorkers) ? 1 : 0);
+
        m_sequenceBufferPerStream.clear();

        // Preparing the buffers.
@ -303,7 +312,7 @@ void TruncatedBPTTPacker::ReadSequencesToSlot(size_t slotIndex)
    {
        // We need a single sequence, potentially we can request (m_truncationSize - slot.AvailableNumberOfSamples())
        // to be more efficient. In reality the truncation size usually is less the sequence size.
-        auto s = m_transformer->GetNextSequences(1);
+        auto s = m_sequenceEnumerator->GetNextSequences(1);
        if (s.m_endOfEpoch)
        {
            break;
--- a/Source/Readers/ReaderLib/TruncatedBpttPacker.h
+++ b/Source/Readers/ReaderLib/TruncatedBpttPacker.h
@ -7,7 +7,6 @@

 #include "Reader.h"
 #include "MemoryProvider.h"
-#include "Transformer.h"
 #include "PackerBase.h"

 namespace Microsoft { namespace MSR { namespace CNTK {
@ -23,7 +22,7 @@ class TruncatedBPTTPacker : public PackerBase
 public:
    TruncatedBPTTPacker(
        MemoryProviderPtr memoryProvider,
-        TransformerPtr transformer,
+        SequenceEnumeratorPtr sequenceEnumerator,
        const std::vector<StreamDescriptionPtr>& streams);

    virtual Minibatch ReadMinibatch() override;
--- a/Source/SGDLib/DataReaderHelpers.h
+++ b/Source/SGDLib/DataReaderHelpers.h
@ -15,12 +15,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {

 /*static*/ struct DataReaderHelpers
 {
+    template <class ElemType>
+    static void NotifyChangedNodes(ComputationNetworkPtr net, StreamMinibatchInputs& inputMatrices)
+    {
+        // reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
+        // TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
+        std::set<MatrixBasePtr> matrices;
+        for (const auto& iter : inputMatrices)
+            matrices.insert(iter.second.matrix);
+        for (auto& node : net->FeatureNodes())
+            if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
+                node->NotifyFunctionValuesMBSizeModified();
+        for (auto& node : net->LabelNodes())
+            if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
+                node->NotifyFunctionValuesMBSizeModified();
+    }
+
    // -------------------------------------------------------------------
    // GetMinibatchIntoNetwork() -- get one minibatch from Reader (this->trainSetDataReader) into Network (this->net)
    // Returns false if no data is read. In that case, no other return value can be expected to contain meaningful values (e.g. actualMBSize will be unchanged).
    // Sets actualMBSize to the number of matrix columns. Note that 0 is a valid value to be returned for actualMBSize, caller must handle that correctly.
    // -------------------------------------------------------------------
-
    // Note: This will go away with the redesigned reader interface.
    // TODO: callers of this often do ComputationNetwork::BumpEvalTimeStamp(featureNodes) and also for labels; we should eliminate the need for this.
    template <class ElemType>
@ -78,17 +93,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), pMBLayout);
        }

-        // reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
-        // TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
-        std::set<MatrixBasePtr> matrices;
-        for (const auto& iter : inputMatrices)
-            matrices.insert(iter.second.matrix);
-        for (auto& node : net->FeatureNodes())
-            if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
-                node->NotifyFunctionValuesMBSizeModified();
-        for (auto& node : net->LabelNodes())
-            if (matrices.find(node->As<ComputationNode<ElemType>>()->ValuePtr()) != matrices.end())
-                node->NotifyFunctionValuesMBSizeModified();
+        NotifyChangedNodes<ElemType>(net, inputMatrices);

        // get MB size and tell Network to update its nodes' buffers based on what's in the input matrices
        // Note: Decimation may have reduced this to 0 frames. We still must return 'true'.
@ -99,6 +104,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        return true;
    }

+    // get StreamMinibatchInputs for a given set of input nodes
+    static StreamMinibatchInputs RetrieveInputMatrices(const std::vector<ComputationNodeBasePtr>& inputNodes)
+    {
+        StreamMinibatchInputs inputMatrices;
+        for (auto& node : inputNodes)
+            inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout());
+        return inputMatrices;
+    }
+
+
    // -------------------------------------------------------------------
    // DecimateMinibatch - decimate minibatch for parallelization
    // -------------------------------------------------------------------
--- a/Source/SGDLib/SGD.cpp
+++ b/Source/SGDLib/SGD.cpp
@ -365,7 +365,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,

        // set dropout rate for this epoch
        // We use the same seed across workers until parallel training kicks in to ensure that the workers have identical models
-        size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UseParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank();
+        size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UsingParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank();
        size_t dropoutRandSeedBase = (parallelWorkerIdx * m_maxEpochs) + i;
        ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropoutRandSeedBase);
        ComputationNetwork::SetBatchNormalizationTimeConstants<ElemType>(net, criterionNodes[0], 
@ -771,9 +771,9 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,

    int numMBsRun = 0;

-    bool useGradientAggregation = UseGradientAggregation(epochNumber);
-    bool useModelAggregation = UseModelAggregation(epochNumber);
-    bool useParallelTrain = UseParallelTrain(epochNumber);
+    bool useGradientAggregation = UsingGradientAggregation(epochNumber);
+    bool useModelAggregation = UsingModelAggregation(epochNumber);
+    bool useParallelTrain = UsingParallelTrain(epochNumber);

    // MA-related variables
    size_t nSamplesSinceLastModelSync = 0;
@ -1024,7 +1024,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
            for (size_t i = 0; i < evaluationNodes.size(); i++)
                m_gradHeader->evalErrors[i] = localEpochEvalErrors.GetCriterion(i);

-            bool samplesProcessed = m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader, epochNumber);
+            bool samplesProcessed = m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader.get(), epochNumber);
            noMoreSamplesToProcess = !samplesProcessed;

            aggregateNumSamples          = m_gradHeader->numSamples;
@ -1796,20 +1796,22 @@ void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int traceLevel)
        if (m_distGradAgg == nullptr)
        {
 #ifdef CNTK_PARALLEL_TRAINING_SUPPORT
-            m_distGradAgg = new AllReduceDistGradAggregator<ElemType>(m_mpi, m_numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
+            m_distGradAgg = std::make_shared<AllReduceDistGradAggregator<ElemType>>(m_mpi, m_numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
 #else
            if (m_numGradientBits != (8 * sizeof(ElemType)))
            {
                RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!");
            }

-            m_distGradAgg = new SimpleDistGradAggregator<ElemType>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace);
+            m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace);
 #endif // !CNTK_PARALLEL_TRAINING_SUPPORT
        }

        if (m_gradHeader == nullptr)
        {
-            m_gradHeader = DistGradHeader::Create(numEvalNodes);
+            m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) {
+                DistGradHeader::Destroy(ptr);
+            });
        }
    }
 }
--- a/Source/SGDLib/SGD.h
+++ b/Source/SGDLib/SGD.h
@ -557,8 +557,8 @@ protected:
    size_t m_prevChosenMinibatchSize;
    double m_lastFinishedEpochTrainLoss;

-    IDistGradAggregator<ElemType>* m_distGradAgg;
-    struct DistGradHeader* m_gradHeader;
+    std::shared_ptr<IDistGradAggregator<ElemType>> m_distGradAgg;
+    std::shared_ptr<struct DistGradHeader> m_gradHeader;

    shared_ptr<IMASGD<ElemType>> m_pMASGDHelper;

@ -566,21 +566,21 @@ private:
    void InitializeAndCheckBlockMomentumSGDParameters();
    void MarkDropoutNodesEvalTimeStampAsOutdated(const ComputationNetworkPtr& net, const ComputationNodeBasePtr& criterionNode);

-    bool UseGradientAggregation(size_t epochNumber)
+    bool UsingGradientAggregation(size_t epochNumber) const
    {
        return ((GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD) && (epochNumber >= m_parallelizationStartEpochNum));
    }

-    bool UseModelAggregation(size_t epochNumber)
+    bool UsingModelAggregation(size_t epochNumber) const
    {
        return ((GetParallelizationMethod() == ParallelizationMethod::modelAveragingSGD ||
                 GetParallelizationMethod() == ParallelizationMethod::blockMomentumSGD) &&
                (epochNumber >= m_parallelizationStartEpochNum));
    }

-    bool UseParallelTrain(size_t epochNumber)
+    bool UsingParallelTrain(size_t epochNumber) const
    {
-        return UseGradientAggregation(epochNumber) || UseModelAggregation(epochNumber);
+        return UsingGradientAggregation(epochNumber) || UsingModelAggregation(epochNumber);
    }
 };

--- a/Source/SGDLib/SimpleEvaluator.h
+++ b/Source/SGDLib/SimpleEvaluator.h
@ -177,7 +177,9 @@ public:
            {
                if (m_gradHeader == nullptr)
                {
-                    m_gradHeader = DistGradHeader::Create(evalNodes.size());
+                    m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) {
+                        DistGradHeader::Destroy(ptr);
+                    });
                    m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false, m_traceLevel);
                }

@ -199,7 +201,7 @@ public:

                // Using SimpleDistAggregator for eval results only. At some point we should rename the class to be just
                // IDistAggregator and SimpleDistAggregator.
-                bool samplesProcessed = m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader, 0);
+                bool samplesProcessed = m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader.get(), 0);
                noMoreSamplesToProcess = !samplesProcessed;

                aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel;
@ -299,8 +301,8 @@ protected:
    MPIWrapperPtr m_mpi;
    bool m_enableDistributedMBReading;

-    shared_ptr<IDistGradAggregator<ElemType>> m_distGradAgg;
-    struct DistGradHeader* m_gradHeader;
+    std::shared_ptr<IDistGradAggregator<ElemType>> m_distGradAgg;
+    std::shared_ptr<struct DistGradHeader> m_gradHeader;
    int m_traceLevel;
    void operator=(const SimpleEvaluator&); // (not assignable)
 };
--- a/Source/SGDLib/SimpleOutputWriter.h
+++ b/Source/SGDLib/SimpleOutputWriter.h
@ -23,63 +23,12 @@ using namespace std;

 namespace Microsoft { namespace MSR { namespace CNTK {

+
 template <class ElemType>
 class SimpleOutputWriter
 {
    typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;

-private:
-    std::vector<ComputationNodeBasePtr> DetermineOutputNodes(const std::vector<std::wstring>& outputNodeNames)
-    {
-        std::vector<ComputationNodeBasePtr> outputNodes;
-
-        if (outputNodeNames.size() == 0)
-        {
-            if (m_verbosity > 0)
-                fprintf(stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
-            if (m_net->OutputNodes().size() == 0)
-                LogicError("There is no default output node specified in the network.");
-
-            outputNodes = m_net->OutputNodes();
-        }
-        else
-        {
-            for (int i = 0; i < outputNodeNames.size(); i++)
-                outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
-        }
-
-        return outputNodes;
-    }
-
-    // collect all input nodes that outputNodes depend on
-    // TODO: This is rather generic, we should move this to a shared place. DataReaderHelpers.h?
-    std::vector<ComputationNodeBasePtr> DetermineInputNodes(const std::vector<ComputationNodeBasePtr>& outputNodes)
-    {
-        // use map to remove duplicated items
-        std::set<ComputationNodeBasePtr> inputNodesMap;
-        for (auto& onode : outputNodes)
-        {
-            for (auto& inode : m_net->InputNodes(onode))
-                inputNodesMap.insert(inode);
-        }
-
-        std::vector<ComputationNodeBasePtr> inputNodes;
-        for (auto& inode : inputNodesMap)
-            inputNodes.push_back(inode);
-
-        return inputNodes;
-    }
-
-    // get StreamMinibatchInputs for a given set of input nodes
-    // TODO: This seems generic, we should have that in a shared place.
-    StreamMinibatchInputs RetrieveInputMatrices(const std::vector<ComputationNodeBasePtr>& inputNodes)
-    {
-        StreamMinibatchInputs inputMatrices;
-        for (auto& node : inputNodes)
-            inputMatrices.AddInput(node->NodeName(), node->ValuePtr(), node->GetMBLayout(), node->GetSampleLayout());
-        return inputMatrices;
-    }
-
 public:
    SimpleOutputWriter(ComputationNetworkPtr net, int verbosity = 0)
        : m_net(net), m_verbosity(verbosity)
@ -90,13 +39,16 @@ public:
    {
        ScopedNetworkOperationMode modeGuard(m_net, NetworkOperationMode::inferring);

-        std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
-        std::vector<ComputationNodeBasePtr> inputNodes  = DetermineInputNodes(outputNodes);
+        if (outputNodeNames.size() == 0 && m_verbosity > 0)
+            fprintf(stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
+
+        std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);
+        std::vector<ComputationNodeBasePtr> inputNodes  = m_net->InputNodesForOutputs(outputNodeNames);

        // allocate memory for forward computation
        m_net->AllocateAllMatrices({}, outputNodes, nullptr);

-        StreamMinibatchInputs inputMatrices = RetrieveInputMatrices(inputNodes);
+        StreamMinibatchInputs inputMatrices = DataReaderHelpers::RetrieveInputMatrices(inputNodes);

        // evaluate with minibatches
        dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
@ -148,7 +100,7 @@ public:
    // Perform a single forward pass to obtain the output values from a network
    void WriteOutput(IDataWriter& dataWriter, const std::vector<std::wstring>& outputNodeNames, size_t numOutputSamples = requestDataSize, bool doUnitTest = false)
    {
-        std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
+        std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);

        // allocate memory for forward computation
        m_net->AllocateAllMatrices({}, outputNodes, nullptr);
@ -203,8 +155,8 @@ public:
        // In case of unit test, make sure backprop works
        ScopedNetworkOperationMode modeGuard(m_net, nodeUnitTest ? NetworkOperationMode::training : NetworkOperationMode::inferring);

-        std::vector<ComputationNodeBasePtr> outputNodes = DetermineOutputNodes(outputNodeNames);
-        std::vector<ComputationNodeBasePtr> inputNodes = DetermineInputNodes(outputNodes);
+        std::vector<ComputationNodeBasePtr> outputNodes = m_net->OutputNodesByName(outputNodeNames);
+        std::vector<ComputationNodeBasePtr> inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
        std::vector<ComputationNodePtr> gradientNodes;
        std::vector<ComputationNodeBasePtr> allOutputNodes = outputNodes;

@ -244,7 +196,7 @@ public:
            m_net->AllocateAllMatrices({}, outputNodes, outputNodes[0]);
        }

-        StreamMinibatchInputs inputMatrices = RetrieveInputMatrices(inputNodes);
+        StreamMinibatchInputs inputMatrices = DataReaderHelpers::RetrieveInputMatrices(inputNodes);
        
        // load a label mapping if requested
        std::vector<std::string> labelMapping;
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/README.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/README.txt
@ -0,0 +1,13 @@
+Test runtimes
+
+Windows:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (debug cpu) - [OK] 808.62 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (debug gpu) - [OK] 86.82 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (release cpu) - [OK] 49.23 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (release gpu) - [OK] 19.37 sec
+
+Linux:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (debug cpu) - [OK] 170.07 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (debug gpu) - [OK] 24.82 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (release cpu) - [OK] 20.71 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden (release gpu) - [OK] 8.17 sec
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/baseline.linux.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/baseline.linux.txt
@ -0,0 +1,458 @@
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/01_OneHidden.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 14:50:25
+		Last modified date: Thu May 12 14:00:37 2016
+		Build type: release
+		Build target: GPU
+		With 1bit-SGD: no
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.5
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by philly on d8dc82703b0f
+		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+-------------------------------------------------------------------
+Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+05/13/2016 15:10:02: -------------------------------------------------------------------
+05/13/2016 15:10:02: Build info: 
+
+05/13/2016 15:10:02: 		Built time: May 13 2016 14:50:25
+05/13/2016 15:10:02: 		Last modified date: Thu May 12 14:00:37 2016
+05/13/2016 15:10:02: 		Build type: release
+05/13/2016 15:10:02: 		Build target: GPU
+05/13/2016 15:10:02: 		With 1bit-SGD: no
+05/13/2016 15:10:02: 		Math lib: acml
+05/13/2016 15:10:02: 		CUDA_PATH: /usr/local/cuda-7.5
+05/13/2016 15:10:02: 		CUB_PATH: /usr/local/cub-1.4.1
+05/13/2016 15:10:02: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/13/2016 15:10:02: 		Build Branch: HEAD
+05/13/2016 15:10:02: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 15:10:02: 		Built by philly on d8dc82703b0f
+05/13/2016 15:10:02: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/13/2016 15:10:02: -------------------------------------------------------------------
+
+05/13/2016 15:10:02: Running on localhost at 2016/05/13 15:10:02
+05/13/2016 15:10:02: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/01_OneHidden.cntk  currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData  RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu  DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config  OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 15:10:02: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:02: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/01_OneHidden"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+]
+test = [
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:02: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:02: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:02: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden"
+ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+]
+test = [
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:02: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:02: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 01_OneHidden.cntk:command=train:test
+configparameters: 01_OneHidden.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config
+configparameters: 01_OneHidden.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+configparameters: 01_OneHidden.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData
+configparameters: 01_OneHidden.cntk:deviceId=0
+configparameters: 01_OneHidden.cntk:imageLayout=cudnn
+configparameters: 01_OneHidden.cntk:initOnCPUOnly=true
+configparameters: 01_OneHidden.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models
+configparameters: 01_OneHidden.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden
+configparameters: 01_OneHidden.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl
+configparameters: 01_OneHidden.cntk:numMBsToShowResult=500
+configparameters: 01_OneHidden.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+configparameters: 01_OneHidden.cntk:precision=float
+configparameters: 01_OneHidden.cntk:RootDir=..
+configparameters: 01_OneHidden.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu
+configparameters: 01_OneHidden.cntk:test=[
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 01_OneHidden.cntk:timestamping=true
+configparameters: 01_OneHidden.cntk:traceLevel=1
+configparameters: 01_OneHidden.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/../../../../../../../Examples/Image/MNIST/Config/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 15:10:02: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 15:10:02: Commands: train test
+05/13/2016 15:10:02: Precision = "float"
+05/13/2016 15:10:02: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden
+05/13/2016 15:10:02: CNTKCommandTrainInfo: train : 3
+05/13/2016 15:10:02: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 15:10:02: ##############################################################################
+05/13/2016 15:10:02: #                                                                            #
+05/13/2016 15:10:02: # Action "train"                                                             #
+05/13/2016 15:10:02: #                                                                            #
+05/13/2016 15:10:02: ##############################################################################
+
+05/13/2016 15:10:02: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 15:10:02: Creating virgin network.
+
+Post-processing network...
+
+4 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop1 = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 17 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 200]
+Validating --> h1.W = LearnableParameter() :  -> [200 x 784]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [784 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [784 x *] -> [784 x 1 x *]
+Validating --> h1.t = Times (h1.W, featScaled) : [200 x 784], [784 x 1 x *] -> [200 x 1 x *]
+Validating --> h1.b = LearnableParameter() :  -> [200 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [200 x 1 x *], [200 x 1] -> [200 x 1 x *]
+Validating --> h1.y = Sigmoid (h1.z) : [200 x 1 x *] -> [200 x 1 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> unnamed81 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
+
+Validating network. 9 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+9 out of 17 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 15:10:02: Created model with 17 nodes on GPU 0.
+
+05/13/2016 15:10:02: Training criterion node(s):
+05/13/2016 15:10:02: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 15:10:02: Evaluation criterion node(s):
+
+05/13/2016 15:10:02: 	errTop1 = ErrorPrediction
+05/13/2016 15:10:02: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[err Gradient[1]] [errTop1 Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[784 x 1 x *]] [features Gradient[784 x *]] [labels Gradient[10 x *]] [unnamed81 Gradient[1 x 1]] }
+0x1bde9d8: {[errTop1 Value[1]] }
+0x1bdeb98: {[err Value[1]] }
+0x1be1e38: {[features Value[784 x *]] }
+0x2447ab8: {[featScale Value[1 x 1]] }
+0x2448c28: {[labels Value[10 x *]] }
+0x2449368: {[h1.W Value[200 x 784]] }
+0x29577e8: {[h1.b Value[200 x 1]] }
+0x2958938: {[ol.W Value[10 x 200]] }
+0x2959808: {[ol.b Value[10 x 1]] }
+0x295b198: {[unnamed81 Value[1 x 1]] }
+0x295ece8: {[featScaled Value[784 x 1 x *]] }
+0x295ef48: {[ol.z Value[10 x 1 x *]] }
+0x295f108: {[ce Value[1]] }
+0x29609d8: {[h1.t Value[200 x 1 x *]] }
+0x2960d88: {[h1.W Gradient[200 x 784]] [h1.z Value[200 x 1 x *]] }
+0x2960ee8: {[h1.t Gradient[200 x 1 x *]] [h1.y Value[200 x 1 x *]] }
+0x2961048: {[h1.z Gradient[200 x 1 x *]] [ol.t Value[10 x 1 x *]] }
+0x2961fa8: {[ce Gradient[1]] }
+0x2962168: {[ol.W Gradient[10 x 200]] [ol.z Gradient[10 x 1 x *]] }
+0x2962328: {[ol.t Gradient[10 x 1 x *]] }
+0x29624e8: {[ol.b Gradient[10 x 1]] }
+0x29626a8: {[h1.b Gradient[200 x 1]] [h1.y Gradient[200 x 1 x *]] }
+
+05/13/2016 15:10:02: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 15:10:02: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:02: Starting minibatch loop.
+05/13/2016 15:10:03:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 1.30072449 * 16000; errTop1 = 0.38468750 * 16000; err = 0.38468750 * 16000; time = 1.2825s; samplesPerSecond = 12475.2
+05/13/2016 15:10:04:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.49017273 * 16000; errTop1 = 0.13037500 * 16000; err = 0.13037500 * 16000; time = 0.2861s; samplesPerSecond = 55923.1
+05/13/2016 15:10:04:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.39744922 * 16000; errTop1 = 0.11168750 * 16000; err = 0.11168750 * 16000; time = 0.2889s; samplesPerSecond = 55389.2
+05/13/2016 15:10:04: Finished Epoch[ 1 of 3]: [Training] ce = 0.65501042 * 60000; errTop1 = 0.18685000 * 60000; err = 0.18685000 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.003125; epochTime=2.09125s
+05/13/2016 15:10:04: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden.1'
+
+05/13/2016 15:10:04: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:04: Starting minibatch loop.
+05/13/2016 15:10:04:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.32870679 * 16000; errTop1 = 0.09531250 * 16000; err = 0.09531250 * 16000; time = 0.2809s; samplesPerSecond = 56955.1
+05/13/2016 15:10:05:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.31809930 * 16000; errTop1 = 0.09206250 * 16000; err = 0.09206250 * 16000; time = 0.2862s; samplesPerSecond = 55905.9
+05/13/2016 15:10:05:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.31002502 * 16000; errTop1 = 0.08762500 * 16000; err = 0.08762500 * 16000; time = 0.2946s; samplesPerSecond = 54305.4
+05/13/2016 15:10:05: Finished Epoch[ 2 of 3]: [Training] ce = 0.31494245 * 60000; errTop1 = 0.09090000 * 60000; err = 0.09090000 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=1.08973s
+05/13/2016 15:10:05: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden.2'
+
+05/13/2016 15:10:06: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:06: Starting minibatch loop.
+05/13/2016 15:10:06:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.28016867 * 16000; errTop1 = 0.08187500 * 16000; err = 0.08187500 * 16000; time = 0.2894s; samplesPerSecond = 55283.2
+05/13/2016 15:10:06:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.28037985 * 16000; errTop1 = 0.08093750 * 16000; err = 0.08093750 * 16000; time = 0.2860s; samplesPerSecond = 55935.8
+05/13/2016 15:10:06:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.27621069 * 16000; errTop1 = 0.08237500 * 16000; err = 0.08237500 * 16000; time = 0.2791s; samplesPerSecond = 57323.8
+05/13/2016 15:10:07: Finished Epoch[ 3 of 3]: [Training] ce = 0.27476087 * 60000; errTop1 = 0.08011667 * 60000; err = 0.08011667 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=1.07334s
+05/13/2016 15:10:07: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_01_OneHidden@release_gpu/Models/01_OneHidden'
+05/13/2016 15:10:07: CNTKCommandTrainEnd: train
+
+05/13/2016 15:10:07: Action "train" complete.
+
+
+05/13/2016 15:10:07: ##############################################################################
+05/13/2016 15:10:07: #                                                                            #
+05/13/2016 15:10:07: # Action "test"                                                              #
+05/13/2016 15:10:07: #                                                                            #
+05/13/2016 15:10:07: ##############################################################################
+
+
+Post-processing network...
+
+4 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop1 = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 17 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 200]
+Validating --> h1.W = LearnableParameter() :  -> [200 x 784]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [784 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [784 x *1] -> [784 x 1 x *1]
+Validating --> h1.t = Times (h1.W, featScaled) : [200 x 784], [784 x 1 x *1] -> [200 x 1 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [200 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [200 x 1 x *1], [200 x 1] -> [200 x 1 x *1]
+Validating --> h1.y = Sigmoid (h1.z) : [200 x 1 x *1] -> [200 x 1 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> unnamed81 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
+
+Validating network. 9 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+9 out of 17 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[ce Gradient[1]] [err Gradient[1]] [errTop1 Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[784 x 1 x *1]] [features Gradient[784 x *1]] [h1.W Gradient[200 x 784]] [h1.b Gradient[200 x 1]] [h1.t Gradient[200 x 1 x *1]] [h1.y Gradient[200 x 1 x *1]] [h1.z Gradient[200 x 1 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 200]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x 1 x *1]] [ol.z Gradient[10 x 1 x *1]] [unnamed81 Gradient[1 x 1]] }
+0x7f0d2f269e18: {[labels Value[10 x *1]] }
+0x7f0d2f26a4c8: {[ol.b Value[10 x 1]] }
+0x7f0d2f26b578: {[ol.W Value[10 x 200]] }
+0x7f0d2f26bd18: {[unnamed81 Value[1 x 1]] }
+0x7f0d2f270658: {[errTop1 Value[1]] }
+0x7f0d2f270818: {[err Value[1]] }
+0x7f0d2f2709d8: {[ce Value[1]] }
+0x7f0d2f270f28: {[h1.t Value[200 x 1 x *1]] }
+0x7f0d2f2720b8: {[featScaled Value[784 x 1 x *1]] }
+0x7f0d2f272588: {[h1.z Value[200 x 1 x *1]] }
+0x7f0d2f272748: {[h1.y Value[200 x 1 x *1]] }
+0x7f0d2f272908: {[ol.t Value[10 x 1 x *1]] }
+0x7f0d2f272ac8: {[ol.z Value[10 x 1 x *1]] }
+0x7f0d35693b68: {[featScale Value[1 x 1]] }
+0x7f0d4bd02258: {[h1.b Value[200 x 1]] }
+0x7f0d4bd02f98: {[features Value[784 x *1]] }
+0x7f0d4bd03c78: {[h1.W Value[200 x 784]] }
+
+05/13/2016 15:10:10: Final Results: Minibatch[1-625]: errTop1 = 0.07140000 * 10000; err = 0.07140000 * 10000; ce = 0.25287636 * 10000; perplexity = 1.28772405
+
+05/13/2016 15:10:10: Action "test" complete.
+
+05/13/2016 15:10:10: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/baseline.windows.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/baseline.windows.txt
@ -0,0 +1,456 @@
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/01_OneHidden.cntk currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 08:06:01
+		Last modified date: Thu May 12 07:31:50 2016
+		Build type: Release
+		Build target: GPU
+		With 1bit-SGD: no
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+		CUB_PATH: c:\src\cub-1.4.1
+		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by svcphil on Philly-Pool3
+		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+-------------------------------------------------------------------
+Changed current directory to C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+05/13/2016 08:15:51: -------------------------------------------------------------------
+05/13/2016 08:15:51: Build info: 
+
+05/13/2016 08:15:51: 		Built time: May 13 2016 08:06:01
+05/13/2016 08:15:51: 		Last modified date: Thu May 12 07:31:50 2016
+05/13/2016 08:15:51: 		Build type: Release
+05/13/2016 08:15:51: 		Build target: GPU
+05/13/2016 08:15:51: 		With 1bit-SGD: no
+05/13/2016 08:15:51: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/13/2016 08:15:51: 		CUB_PATH: c:\src\cub-1.4.1
+05/13/2016 08:15:51: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/13/2016 08:15:51: 		Build Branch: HEAD
+05/13/2016 08:15:51: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 08:15:51: 		Built by svcphil on Philly-Pool3
+05/13/2016 08:15:51: 		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/13/2016 08:15:51: -------------------------------------------------------------------
+
+05/13/2016 08:15:51: Running on Philly-Pool2 at 2016/05/13 08:15:51
+05/13/2016 08:15:51: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/01_OneHidden.cntk  currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu  DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 08:15:51: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:15:51: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/01_OneHidden"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+]
+test = [
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:15:51: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:15:51: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:15:51: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden"
+ndlMacros = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+]
+test = [
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:15:51: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:15:51: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 01_OneHidden.cntk:command=train:test
+configparameters: 01_OneHidden.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+configparameters: 01_OneHidden.cntk:currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+configparameters: 01_OneHidden.cntk:DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData
+configparameters: 01_OneHidden.cntk:deviceId=0
+configparameters: 01_OneHidden.cntk:imageLayout=cudnn
+configparameters: 01_OneHidden.cntk:initOnCPUOnly=true
+configparameters: 01_OneHidden.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models
+configparameters: 01_OneHidden.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden
+configparameters: 01_OneHidden.cntk:ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl
+configparameters: 01_OneHidden.cntk:numMBsToShowResult=500
+configparameters: 01_OneHidden.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+configparameters: 01_OneHidden.cntk:precision=float
+configparameters: 01_OneHidden.cntk:RootDir=..
+configparameters: 01_OneHidden.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu
+configparameters: 01_OneHidden.cntk:test=[
+    action = "test"
+    minibatchSize = 16
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 01_OneHidden.cntk:timestamping=true
+configparameters: 01_OneHidden.cntk:traceLevel=1
+configparameters: 01_OneHidden.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/01_OneHidden.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1
+        momentumPerMB = 0
+        maxEpochs = 30
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]   
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 08:15:51: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 08:15:51: Commands: train test
+05/13/2016 08:15:51: Precision = "float"
+05/13/2016 08:15:51: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden
+05/13/2016 08:15:51: CNTKCommandTrainInfo: train : 3
+05/13/2016 08:15:51: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 08:15:51: ##############################################################################
+05/13/2016 08:15:51: #                                                                            #
+05/13/2016 08:15:51: # Action "train"                                                             #
+05/13/2016 08:15:51: #                                                                            #
+05/13/2016 08:15:51: ##############################################################################
+
+05/13/2016 08:15:51: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 08:15:52: Creating virgin network.
+
+Post-processing network...
+
+4 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop1 = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 17 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 200]
+Validating --> h1.W = LearnableParameter() :  -> [200 x 784]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [784 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [784 x *] -> [784 x 1 x *]
+Validating --> h1.t = Times (h1.W, featScaled) : [200 x 784], [784 x 1 x *] -> [200 x 1 x *]
+Validating --> h1.b = LearnableParameter() :  -> [200 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [200 x 1 x *], [200 x 1] -> [200 x 1 x *]
+Validating --> h1.y = Sigmoid (h1.z) : [200 x 1 x *] -> [200 x 1 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> unnamed81 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
+
+Validating network. 9 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+9 out of 17 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 08:15:53: Created model with 17 nodes on GPU 0.
+
+05/13/2016 08:15:53: Training criterion node(s):
+05/13/2016 08:15:53: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 08:15:53: Evaluation criterion node(s):
+
+05/13/2016 08:15:53: 	errTop1 = ErrorPrediction
+05/13/2016 08:15:53: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[err Gradient[1]] [errTop1 Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[784 x 1 x *]] [features Gradient[784 x *]] [labels Gradient[10 x *]] [unnamed81 Gradient[1 x 1]] }
+000000780D2D93A0: {[unnamed81 Value[1 x 1]] }
+000000780D2D9440: {[featScaled Value[784 x 1 x *]] }
+000000780D2D94E0: {[h1.W Gradient[200 x 784]] [h1.z Value[200 x 1 x *]] }
+000000780D2D9580: {[h1.t Gradient[200 x 1 x *]] [h1.y Value[200 x 1 x *]] }
+000000780D2D9620: {[h1.z Gradient[200 x 1 x *]] [ol.t Value[10 x 1 x *]] }
+000000780D2D96C0: {[ol.W Value[10 x 200]] }
+000000780D2D9760: {[ol.b Value[10 x 1]] }
+000000780D2D99E0: {[errTop1 Value[1]] }
+000000780D2D9EE0: {[err Value[1]] }
+000000780D2DA0C0: {[ol.z Value[10 x 1 x *]] }
+000000780D2DA160: {[ce Value[1]] }
+000000780D2DA2A0: {[h1.t Value[200 x 1 x *]] }
+000000780D33AB50: {[ce Gradient[1]] }
+000000780D33ABF0: {[ol.t Gradient[10 x 1 x *]] }
+000000780D33AFB0: {[ol.b Gradient[10 x 1]] }
+000000780D33C270: {[h1.b Gradient[200 x 1]] [h1.y Gradient[200 x 1 x *]] }
+000000780D33C9F0: {[ol.W Gradient[10 x 200]] [ol.z Gradient[10 x 1 x *]] }
+00000078767789E0: {[featScale Value[1 x 1]] }
+0000007876778A80: {[labels Value[10 x *]] }
+0000007876778B20: {[h1.W Value[200 x 784]] }
+0000007876778BC0: {[h1.b Value[200 x 1]] }
+000000787677A1A0: {[features Value[784 x *]] }
+
+05/13/2016 08:15:53: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 08:15:53: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:15:53: Starting minibatch loop.
+05/13/2016 08:15:56:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 1.29023352 * 16000; errTop1 = 0.37981250 * 16000; err = 0.37981250 * 16000; time = 3.1210s; samplesPerSecond = 5126.5
+05/13/2016 08:15:57:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.50742346 * 16000; errTop1 = 0.13900000 * 16000; err = 0.13900000 * 16000; time = 0.6202s; samplesPerSecond = 25796.5
+05/13/2016 08:15:57:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.39415479 * 16000; errTop1 = 0.11081250 * 16000; err = 0.11081250 * 16000; time = 0.6195s; samplesPerSecond = 25828.0
+05/13/2016 08:15:58: Finished Epoch[ 1 of 3]: [Training] ce = 0.65521146 * 60000; errTop1 = 0.18846667 * 60000; err = 0.18846667 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.003125; epochTime=4.86409s
+05/13/2016 08:15:58: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden.1'
+
+05/13/2016 08:15:58: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:15:58: Starting minibatch loop.
+05/13/2016 08:15:59:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.33321408 * 16000; errTop1 = 0.09581250 * 16000; err = 0.09581250 * 16000; time = 0.6590s; samplesPerSecond = 24277.8
+05/13/2016 08:15:59:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.31547781 * 16000; errTop1 = 0.09287500 * 16000; err = 0.09287500 * 16000; time = 0.6704s; samplesPerSecond = 23866.0
+05/13/2016 08:16:00:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.31882251 * 16000; errTop1 = 0.09218750 * 16000; err = 0.09218750 * 16000; time = 0.6720s; samplesPerSecond = 23808.7
+05/13/2016 08:16:00: Finished Epoch[ 2 of 3]: [Training] ce = 0.31533239 * 60000; errTop1 = 0.09158333 * 60000; err = 0.09158333 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=2.52448s
+05/13/2016 08:16:00: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden.2'
+
+05/13/2016 08:16:00: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:16:00: Starting minibatch loop.
+05/13/2016 08:16:01:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.28751190 * 16000; errTop1 = 0.08393750 * 16000; err = 0.08393750 * 16000; time = 0.6195s; samplesPerSecond = 25825.2
+05/13/2016 08:16:02:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.27455951 * 16000; errTop1 = 0.07950000 * 16000; err = 0.07950000 * 16000; time = 0.6193s; samplesPerSecond = 25834.3
+05/13/2016 08:16:02:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.27693610 * 16000; errTop1 = 0.07987500 * 16000; err = 0.07987500 * 16000; time = 0.6192s; samplesPerSecond = 25839.8
+05/13/2016 08:16:03: Finished Epoch[ 3 of 3]: [Training] ce = 0.27493141 * 60000; errTop1 = 0.07983333 * 60000; err = 0.07983333 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=2.34147s
+05/13/2016 08:16:03: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_01_OneHidden@release_gpu/Models/01_OneHidden'
+05/13/2016 08:16:03: CNTKCommandTrainEnd: train
+
+05/13/2016 08:16:03: Action "train" complete.
+
+
+05/13/2016 08:16:03: ##############################################################################
+05/13/2016 08:16:03: #                                                                            #
+05/13/2016 08:16:03: # Action "test"                                                              #
+05/13/2016 08:16:03: #                                                                            #
+05/13/2016 08:16:03: ##############################################################################
+
+
+Post-processing network...
+
+4 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	errTop1 = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 17 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 200]
+Validating --> h1.W = LearnableParameter() :  -> [200 x 784]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [784 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [784 x *1] -> [784 x 1 x *1]
+Validating --> h1.t = Times (h1.W, featScaled) : [200 x 784], [784 x 1 x *1] -> [200 x 1 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [200 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [200 x 1 x *1], [200 x 1] -> [200 x 1 x *1]
+Validating --> h1.y = Sigmoid (h1.z) : [200 x 1 x *1] -> [200 x 1 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> unnamed81 = LearnableParameter() :  -> [1 x 1]
+Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
+
+Validating network. 9 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+
+9 out of 17 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[ce Gradient[1]] [err Gradient[1]] [errTop1 Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[784 x 1 x *1]] [features Gradient[784 x *1]] [h1.W Gradient[200 x 784]] [h1.b Gradient[200 x 1]] [h1.t Gradient[200 x 1 x *1]] [h1.y Gradient[200 x 1 x *1]] [h1.z Gradient[200 x 1 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 200]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x 1 x *1]] [ol.z Gradient[10 x 1 x *1]] [unnamed81 Gradient[1 x 1]] }
+000000780D33B230: {[labels Value[10 x *1]] }
+000000780D33BA50: {[ol.b Value[10 x 1]] }
+000000780D33BD70: {[featScale Value[1 x 1]] }
+000000780D33BF50: {[h1.b Value[200 x 1]] }
+000000780D33C6D0: {[features Value[784 x *1]] }
+000000780D33C770: {[h1.W Value[200 x 784]] }
+000000787673E350: {[ol.z Value[10 x 1 x *1]] }
+000000787673E850: {[ol.t Value[10 x 1 x *1]] }
+00000078767789E0: {[ol.W Value[10 x 200]] }
+0000007876778A80: {[unnamed81 Value[1 x 1]] }
+0000007876779020: {[errTop1 Value[1]] }
+00000078767790C0: {[err Value[1]] }
+0000007876779160: {[ce Value[1]] }
+00000078767792A0: {[h1.t Value[200 x 1 x *1]] }
+00000078767793E0: {[h1.z Value[200 x 1 x *1]] }
+00000078767795C0: {[h1.y Value[200 x 1 x *1]] }
+00000078767797A0: {[featScaled Value[784 x 1 x *1]] }
+
+05/13/2016 08:16:11: Final Results: Minibatch[1-625]: errTop1 = 0.07460000 * 10000; err = 0.07460000 * 10000; ce = 0.26425332 * 10000; perplexity = 1.30245809
+
+05/13/2016 08:16:11: Action "test" complete.
+
+05/13/2016 08:16:11: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/run-test
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/run-test
@ -0,0 +1,11 @@
+#!/bin/bash
+
+. $TEST_DIR/../run-test-common
+
+cntkrun  $CNTKTextConfigDir/01_OneHidden.cntk "train=[SGD=[maxEpochs=3]] imageLayout=\"$imageLayout\""
+ExitCode=$?
+
+# Delete the test data if copied
+[[ "$Copied" -eq "1" ]] && rm -rf "$DataDir"
+
+exit $ExitCode
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/testcases.yml
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/01_OneHidden/testcases.yml
@ -0,0 +1,41 @@
+dataDir: ../../../../../../../Examples/Image/MNIST/Data
+
+tags:
+    # In BVT, run Release GPU
+    - bvt-e (build_sku=='gpu') and (device=='gpu') and (flavor=='release') 
+    # In Nightly on Linux, additionally run Debug GPU and Release CPU
+    - nightly-e (build_sku=='gpu') and (((device=='gpu') and (flavor=='release')) or (os=='linux' and ((flavor=='debug') ^ (device=='cpu'))))
+
+testCases:
+  CNTK Run must be completed:
+    patterns:
+      - __COMPLETED__
+
+  Must train epochs in exactly same order and parameters:
+    patterns:
+      - Starting Epoch {{integer}}
+      - learning rate per sample = {{float}}
+      - momentum = {{float}}
+
+  Epochs must be finished with expected results:
+    patterns:
+      - Finished Epoch[{{integer}} of {{integer}}]
+      - ce = {{float,tolerance=.1%}} * {{integer}}
+      - errTop1 = {{float,tolerance=.1%}} * {{integer}}
+      - err = {{float,tolerance=.1%}} * {{integer}}
+      - totalSamplesSeen = {{integer}}
+      - learningRatePerSample = {{float,tolerance=0.001%}}
+
+  Per-minibatch training results must match:
+    patterns:
+      - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
+      - ce = {{float,tolerance=.1%}} * {{integer}}
+      - errTop1 = {{float,tolerance=.1%}} * {{integer}}
+      - err = {{float,tolerance=.1%}} * {{integer}}
+
+  Final test results must match:
+    patterns:
+      - "Final Results: Minibatch[{{integer}}-{{integer}}]"
+      - errTop1 = {{float,tolerance=.1%}} * {{integer}}
+      - err = {{float,tolerance=.1%}} * {{integer}}
+      - ce = {{float,tolerance=.1%}} * {{integer}}
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/README.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/README.txt
@ -0,0 +1,9 @@
+Test runtimes
+
+Windows:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution (debug gpu) - [OK] 157.53 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution (release gpu) - [OK] 64.93 sec
+
+Linux:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution (debug gpu) - [OK] 40.43 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution (release gpu) - [OK] 18.92 sec
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/baseline.linux.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/baseline.linux.txt
@ -0,0 +1,524 @@
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/02_Convolution.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 14:50:25
+		Last modified date: Thu May 12 14:00:37 2016
+		Build type: release
+		Build target: GPU
+		With 1bit-SGD: no
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.5
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by philly on d8dc82703b0f
+		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+-------------------------------------------------------------------
+Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+05/13/2016 15:10:11: -------------------------------------------------------------------
+05/13/2016 15:10:11: Build info: 
+
+05/13/2016 15:10:11: 		Built time: May 13 2016 14:50:25
+05/13/2016 15:10:11: 		Last modified date: Thu May 12 14:00:37 2016
+05/13/2016 15:10:11: 		Build type: release
+05/13/2016 15:10:11: 		Build target: GPU
+05/13/2016 15:10:11: 		With 1bit-SGD: no
+05/13/2016 15:10:11: 		Math lib: acml
+05/13/2016 15:10:11: 		CUDA_PATH: /usr/local/cuda-7.5
+05/13/2016 15:10:11: 		CUB_PATH: /usr/local/cub-1.4.1
+05/13/2016 15:10:11: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/13/2016 15:10:11: 		Build Branch: HEAD
+05/13/2016 15:10:11: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 15:10:11: 		Built by philly on d8dc82703b0f
+05/13/2016 15:10:11: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/13/2016 15:10:11: -------------------------------------------------------------------
+
+05/13/2016 15:10:11: Running on localhost at 2016/05/13 15:10:11
+05/13/2016 15:10:11: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/02_Convolution.cntk  currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData  RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu  DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config  OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 15:10:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:11: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/02_Convolution"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+prefetch=true
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:11: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution"
+ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+prefetch=true
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:11: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 02_Convolution.cntk:command=train:test
+configparameters: 02_Convolution.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config
+configparameters: 02_Convolution.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+configparameters: 02_Convolution.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData
+configparameters: 02_Convolution.cntk:deviceId=0
+configparameters: 02_Convolution.cntk:imageLayout=cudnn
+configparameters: 02_Convolution.cntk:initOnCPUOnly=true
+configparameters: 02_Convolution.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models
+configparameters: 02_Convolution.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution
+configparameters: 02_Convolution.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl
+configparameters: 02_Convolution.cntk:numMBsToShowResult=500
+configparameters: 02_Convolution.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+configparameters: 02_Convolution.cntk:precision=float
+configparameters: 02_Convolution.cntk:prefetch=true
+configparameters: 02_Convolution.cntk:RootDir=..
+configparameters: 02_Convolution.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu
+configparameters: 02_Convolution.cntk:test=[
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 02_Convolution.cntk:timestamping=true
+configparameters: 02_Convolution.cntk:traceLevel=1
+configparameters: 02_Convolution.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/../../../../../../../Examples/Image/MNIST/Config/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 15:10:11: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 15:10:11: Commands: train test
+05/13/2016 15:10:11: Precision = "float"
+05/13/2016 15:10:11: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution
+05/13/2016 15:10:11: CNTKCommandTrainInfo: train : 3
+05/13/2016 15:10:11: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 15:10:11: ##############################################################################
+05/13/2016 15:10:11: #                                                                            #
+05/13/2016 15:10:11: # Action "train"                                                             #
+05/13/2016 15:10:11: #                                                                            #
+05/13/2016 15:10:11: ##############################################################################
+
+05/13/2016 15:10:11: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 15:10:11: Creating virgin network.
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 27 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.w.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.w.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *] -> [28 x 28 x 1 x *]
+Validating --> conv1.c.c = Convolution (conv1.w.W, featScaled) : [16 x 25], [28 x 28 x 1 x *] -> [28 x 28 x 16 x *]
+Validating --> conv1.b.b = LearnableParameter() :  -> [1 x 1 x 16]
+Validating --> conv1.cpb = Plus (conv1.c.c, conv1.b.b) : [28 x 28 x 16 x *], [1 x 1 x 16] -> [28 x 28 x 16 x *]
+Validating --> conv1.out = RectifiedLinear (conv1.cpb) : [28 x 28 x 16 x *] -> [28 x 28 x 16 x *]
+Validating --> pool1 = MaxPooling (conv1.out) : [28 x 28 x 16 x *] -> [14 x 14 x 16 x *]
+Validating --> conv2.c.c = Convolution (conv2.w.W, pool1) : [32 x 400], [14 x 14 x 16 x *] -> [14 x 14 x 32 x *]
+Validating --> conv2.b.b = LearnableParameter() :  -> [1 x 1 x 32]
+Validating --> conv2.cpb = Plus (conv2.c.c, conv2.b.b) : [14 x 14 x 32 x *], [1 x 1 x 32] -> [14 x 14 x 32 x *]
+Validating --> conv2.out = RectifiedLinear (conv2.cpb) : [14 x 14 x 32 x *] -> [14 x 14 x 32 x *]
+Validating --> pool2.p = Pooling (conv2.out) : [14 x 14 x 32 x *] -> [7 x 7 x 32 x *]
+Validating --> h1.t = Times (h1.W, pool2.p) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *] -> [128 x *]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [128 x *], [128 x 1] -> [128 x 1 x *]
+Validating --> h1.y = Sigmoid (h1.z) : [128 x 1 x *] -> [128 x 1 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 32, Stride: 1 x 1 x 16, Sharing: (1, 1, 1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+
+11 out of 27 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 15:10:11: Created model with 27 nodes on GPU 0.
+
+05/13/2016 15:10:11: Training criterion node(s):
+05/13/2016 15:10:11: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 15:10:11: Evaluation criterion node(s):
+
+05/13/2016 15:10:11: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *]] [features Gradient[28 x 28 x 1 x *]] [labels Gradient[10 x *]] }
+0x132d628: {[features Value[28 x 28 x 1 x *]] }
+0x1854138: {[featScale Value[1 x 1]] }
+0x1ca8388: {[labels Value[10 x *]] }
+0x1ca8b18: {[conv1.w.W Value[16 x 25]] }
+0x1ca9778: {[conv1.b.b Value[1 x 1 x 16]] }
+0x1caaa88: {[conv2.w.W Value[32 x 400]] }
+0x1cac278: {[conv2.b.b Value[1 x 1 x 32]] }
+0x1cb04f8: {[h1.W Value[128 x 7 x 7 x 32]] }
+0x1cb1728: {[h1.b Value[128 x 1]] }
+0x1cb2318: {[ol.W Value[10 x 128]] }
+0x1cb3468: {[ol.b Value[10 x 1]] }
+0x7f427f204c08: {[conv1.c.c Value[28 x 28 x 16 x *]] }
+0x7f427f20bd48: {[h1.b Gradient[128 x 1]] [h1.y Gradient[128 x 1 x *]] }
+0x7f427f4d3118: {[err Value[1]] }
+0x7f427f4e3b08: {[featScaled Value[28 x 28 x 1 x *]] }
+0x7f427f4e3db8: {[conv1.cpb Value[28 x 28 x 16 x *]] [conv1.w.W Gradient[16 x 25]] }
+0x7f427f4e42d8: {[conv1.c.c Gradient[28 x 28 x 16 x *]] [conv1.out Value[28 x 28 x 16 x *]] }
+0x7f427f4e4498: {[conv1.cpb Gradient[28 x 28 x 16 x *]] [pool1 Value[14 x 14 x 16 x *]] }
+0x7f427f4e4658: {[conv2.c.c Value[14 x 14 x 32 x *]] }
+0x7f427f4e4818: {[conv1.b.b Gradient[1 x 1 x 16]] [conv1.out Gradient[28 x 28 x 16 x *]] }
+0x7f427f4e49d8: {[conv2.cpb Value[14 x 14 x 32 x *]] [conv2.w.W Gradient[32 x 400]] }
+0x7f427f4e4b98: {[conv2.c.c Gradient[14 x 14 x 32 x *]] [conv2.out Value[14 x 14 x 32 x *]] }
+0x7f427f4e4d58: {[conv2.cpb Gradient[14 x 14 x 32 x *]] [pool1 Gradient[14 x 14 x 16 x *]] [pool2.p Value[7 x 7 x 32 x *]] }
+0x7f427f4e4f18: {[conv2.b.b Gradient[1 x 1 x 32]] [conv2.out Gradient[14 x 14 x 32 x *]] [h1.t Value[128 x *]] }
+0x7f427f4e50d8: {[h1.W Gradient[128 x 7 x 7 x 32]] [h1.z Value[128 x 1 x *]] }
+0x7f427f4e5298: {[h1.t Gradient[128 x *]] [h1.y Value[128 x 1 x *]] }
+0x7f427f4e5458: {[h1.z Gradient[128 x 1 x *]] [ol.t Value[10 x 1 x *]] [pool2.p Gradient[7 x 7 x 32 x *]] }
+0x7f427f4e5f38: {[ce Gradient[1]] }
+0x7f427f4e60f8: {[ol.W Gradient[10 x 128]] [ol.z Gradient[10 x 1 x *]] }
+0x7f427f4e62b8: {[ol.t Gradient[10 x 1 x *]] }
+0x7f427f4e6478: {[ol.b Gradient[10 x 1]] }
+0x7f427f4ff658: {[ce Value[1]] }
+0x7f427f4ffea8: {[ol.z Value[10 x 1 x *]] }
+
+05/13/2016 15:10:11: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 15:10:11: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:11: Starting minibatch loop.
+05/13/2016 15:10:13:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 1.05460791 * 16000; err = 0.35256250 * 16000; time = 2.0377s; samplesPerSecond = 7852.2
+05/13/2016 15:10:14:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.16176135 * 16000; err = 0.04425000 * 16000; time = 0.9884s; samplesPerSecond = 16187.9
+05/13/2016 15:10:15:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.10889783 * 16000; err = 0.03043750 * 16000; time = 0.9868s; samplesPerSecond = 16214.2
+05/13/2016 15:10:16: Finished Epoch[ 1 of 3]: [Training] ce = 0.37214827 * 60000; err = 0.11981667 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.003125; epochTime=4.77593s
+05/13/2016 15:10:16: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution.1'
+
+05/13/2016 15:10:16: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:16: Starting minibatch loop.
+05/13/2016 15:10:17:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.07178102 * 16000; err = 0.02206250 * 16000; time = 0.9982s; samplesPerSecond = 16029.6
+05/13/2016 15:10:18:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.06225394 * 16000; err = 0.01800000 * 16000; time = 0.9949s; samplesPerSecond = 16082.6
+05/13/2016 15:10:19:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.06624252 * 16000; err = 0.02025000 * 16000; time = 0.9961s; samplesPerSecond = 16062.5
+05/13/2016 15:10:19: Finished Epoch[ 2 of 3]: [Training] ce = 0.06652122 * 60000; err = 0.01995000 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=3.74643s
+05/13/2016 15:10:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution.2'
+
+05/13/2016 15:10:20: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 15:10:20: Starting minibatch loop.
+05/13/2016 15:10:21:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.04257084 * 16000; err = 0.01256250 * 16000; time = 0.9942s; samplesPerSecond = 16093.1
+05/13/2016 15:10:21:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.04675156 * 16000; err = 0.01418750 * 16000; time = 0.9927s; samplesPerSecond = 16118.2
+05/13/2016 15:10:22:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.04904524 * 16000; err = 0.01475000 * 16000; time = 0.9901s; samplesPerSecond = 16160.8
+05/13/2016 15:10:23: Finished Epoch[ 3 of 3]: [Training] ce = 0.04529028 * 60000; err = 0.01366667 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=3.73418s
+05/13/2016 15:10:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_02_Convolution@release_gpu/Models/02_Convolution'
+05/13/2016 15:10:23: CNTKCommandTrainEnd: train
+
+05/13/2016 15:10:23: Action "train" complete.
+
+
+05/13/2016 15:10:23: ##############################################################################
+05/13/2016 15:10:23: #                                                                            #
+05/13/2016 15:10:23: # Action "test"                                                              #
+05/13/2016 15:10:23: #                                                                            #
+05/13/2016 15:10:23: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 27 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.w.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.w.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *1] -> [28 x 28 x 1 x *1]
+Validating --> conv1.c.c = Convolution (conv1.w.W, featScaled) : [16 x 25], [28 x 28 x 1 x *1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.b.b = LearnableParameter() :  -> [1 x 1 x 16]
+Validating --> conv1.cpb = Plus (conv1.c.c, conv1.b.b) : [28 x 28 x 16 x *1], [1 x 1 x 16] -> [28 x 28 x 16 x *1]
+Validating --> conv1.out = RectifiedLinear (conv1.cpb) : [28 x 28 x 16 x *1] -> [28 x 28 x 16 x *1]
+Validating --> pool1 = MaxPooling (conv1.out) : [28 x 28 x 16 x *1] -> [14 x 14 x 16 x *1]
+Validating --> conv2.c.c = Convolution (conv2.w.W, pool1) : [32 x 400], [14 x 14 x 16 x *1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.b.b = LearnableParameter() :  -> [1 x 1 x 32]
+Validating --> conv2.cpb = Plus (conv2.c.c, conv2.b.b) : [14 x 14 x 32 x *1], [1 x 1 x 32] -> [14 x 14 x 32 x *1]
+Validating --> conv2.out = RectifiedLinear (conv2.cpb) : [14 x 14 x 32 x *1] -> [14 x 14 x 32 x *1]
+Validating --> pool2.p = Pooling (conv2.out) : [14 x 14 x 32 x *1] -> [7 x 7 x 32 x *1]
+Validating --> h1.t = Times (h1.W, pool2.p) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *1] -> [128 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [128 x *1], [128 x 1] -> [128 x 1 x *1]
+Validating --> h1.y = Sigmoid (h1.z) : [128 x 1 x *1] -> [128 x 1 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 32, Stride: 1 x 1 x 16, Sharing: (1, 1, 1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+
+11 out of 27 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[ce Gradient[1]] [conv1.b.b Gradient[1 x 1 x 16]] [conv1.c.c Gradient[28 x 28 x 16 x *1]] [conv1.cpb Gradient[28 x 28 x 16 x *1]] [conv1.out Gradient[28 x 28 x 16 x *1]] [conv1.w.W Gradient[16 x 25]] [conv2.b.b Gradient[1 x 1 x 32]] [conv2.c.c Gradient[14 x 14 x 32 x *1]] [conv2.cpb Gradient[14 x 14 x 32 x *1]] [conv2.out Gradient[14 x 14 x 32 x *1]] [conv2.w.W Gradient[32 x 400]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *1]] [features Gradient[28 x 28 x 1 x *1]] [h1.W Gradient[128 x 7 x 7 x 32]] [h1.b Gradient[128 x 1]] [h1.t Gradient[128 x *1]] [h1.y Gradient[128 x 1 x *1]] [h1.z Gradient[128 x 1 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 128]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x 1 x *1]] [ol.z Gradient[10 x 1 x *1]] [pool1 Gradient[14 x 14 x 16 x *1]] [pool2.p Gradient[7 x 7 x 32 x *1]] }
+0x7f4274adf028: {[conv1.b.b Value[1 x 1 x 16]] }
+0x7f4274adfe98: {[h1.b Value[128 x 1]] }
+0x7f427ae42308: {[err Value[1]] }
+0x7f427ae62498: {[featScaled Value[28 x 28 x 1 x *1]] }
+0x7f427ae62748: {[conv1.c.c Value[28 x 28 x 16 x *1]] }
+0x7f427ae62c08: {[conv1.cpb Value[28 x 28 x 16 x *1]] }
+0x7f427ae62dc8: {[conv1.out Value[28 x 28 x 16 x *1]] }
+0x7f427ae62f88: {[pool1 Value[14 x 14 x 16 x *1]] }
+0x7f427ae63148: {[conv2.c.c Value[14 x 14 x 32 x *1]] }
+0x7f427ae634c8: {[conv2.cpb Value[14 x 14 x 32 x *1]] }
+0x7f427ae63688: {[conv2.out Value[14 x 14 x 32 x *1]] }
+0x7f427ae63848: {[pool2.p Value[7 x 7 x 32 x *1]] }
+0x7f427ae646a8: {[labels Value[10 x *1]] }
+0x7f427ae64b18: {[ol.b Value[10 x 1]] }
+0x7f427ae668a8: {[conv2.w.W Value[32 x 400]] }
+0x7f427ae72368: {[h1.W Value[128 x 7 x 7 x 32]] }
+0x7f427f20cb08: {[ol.W Value[10 x 128]] }
+0x7f427f20e888: {[featScale Value[1 x 1]] }
+0x7f427f20ea48: {[features Value[28 x 28 x 1 x *1]] }
+0x7f427f4d37a8: {[conv1.w.W Value[16 x 25]] }
+0x7f427f4d3968: {[conv2.b.b Value[1 x 1 x 32]] }
+0x7f427f4e2108: {[ce Value[1]] }
+0x7f427f4fcea8: {[h1.t Value[128 x *1]] }
+0x7f427f4fd068: {[h1.z Value[128 x 1 x *1]] }
+0x7f427f4fd228: {[h1.y Value[128 x 1 x *1]] }
+0x7f427f4fd3e8: {[ol.t Value[10 x 1 x *1]] }
+0x7f427f4fd5a8: {[ol.z Value[10 x 1 x *1]] }
+
+05/13/2016 15:10:28: Final Results: Minibatch[1-625]: err = 0.01460000 * 10000; ce = 0.04549626 * 10000; perplexity = 1.04654709
+
+05/13/2016 15:10:28: Action "test" complete.
+
+05/13/2016 15:10:28: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/baseline.windows.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/baseline.windows.txt
@ -0,0 +1,522 @@
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/02_Convolution.cntk currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 08:06:01
+		Last modified date: Thu May 12 07:31:50 2016
+		Build type: Release
+		Build target: GPU
+		With 1bit-SGD: no
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+		CUB_PATH: c:\src\cub-1.4.1
+		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by svcphil on Philly-Pool3
+		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+-------------------------------------------------------------------
+Changed current directory to C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+05/13/2016 08:16:16: -------------------------------------------------------------------
+05/13/2016 08:16:16: Build info: 
+
+05/13/2016 08:16:16: 		Built time: May 13 2016 08:06:01
+05/13/2016 08:16:16: 		Last modified date: Thu May 12 07:31:50 2016
+05/13/2016 08:16:16: 		Build type: Release
+05/13/2016 08:16:16: 		Build target: GPU
+05/13/2016 08:16:16: 		With 1bit-SGD: no
+05/13/2016 08:16:16: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/13/2016 08:16:16: 		CUB_PATH: c:\src\cub-1.4.1
+05/13/2016 08:16:16: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/13/2016 08:16:16: 		Build Branch: HEAD
+05/13/2016 08:16:16: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 08:16:16: 		Built by svcphil on Philly-Pool3
+05/13/2016 08:16:16: 		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/13/2016 08:16:16: -------------------------------------------------------------------
+
+05/13/2016 08:16:16: Running on Philly-Pool2 at 2016/05/13 08:16:16
+05/13/2016 08:16:16: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/02_Convolution.cntk  currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu  DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 08:16:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:16:16: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/02_Convolution"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+prefetch=true
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:16:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:16:16: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:16:16: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution"
+ndlMacros = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+prefetch=true
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:16:16: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:16:16: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 02_Convolution.cntk:command=train:test
+configparameters: 02_Convolution.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+configparameters: 02_Convolution.cntk:currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+configparameters: 02_Convolution.cntk:DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData
+configparameters: 02_Convolution.cntk:deviceId=0
+configparameters: 02_Convolution.cntk:imageLayout=cudnn
+configparameters: 02_Convolution.cntk:initOnCPUOnly=true
+configparameters: 02_Convolution.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models
+configparameters: 02_Convolution.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution
+configparameters: 02_Convolution.cntk:ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl
+configparameters: 02_Convolution.cntk:numMBsToShowResult=500
+configparameters: 02_Convolution.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+configparameters: 02_Convolution.cntk:precision=float
+configparameters: 02_Convolution.cntk:prefetch=true
+configparameters: 02_Convolution.cntk:RootDir=..
+configparameters: 02_Convolution.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu
+configparameters: 02_Convolution.cntk:test=[
+    action = test
+    minibatchSize = 16
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/02_Convolution.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 02_Convolution.cntk:timestamping=true
+configparameters: 02_Convolution.cntk:traceLevel=1
+configparameters: 02_Convolution.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/02_Convolution.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.1*5:0.3
+        momentumPerMB = 0*10:0.7
+        maxEpochs = 15
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 08:16:16: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 08:16:16: Commands: train test
+05/13/2016 08:16:16: Precision = "float"
+05/13/2016 08:16:16: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution
+05/13/2016 08:16:16: CNTKCommandTrainInfo: train : 3
+05/13/2016 08:16:16: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 08:16:16: ##############################################################################
+05/13/2016 08:16:16: #                                                                            #
+05/13/2016 08:16:16: # Action "train"                                                             #
+05/13/2016 08:16:16: #                                                                            #
+05/13/2016 08:16:16: ##############################################################################
+
+05/13/2016 08:16:16: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 08:16:16: Creating virgin network.
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 27 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.w.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.w.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *] -> [28 x 28 x 1 x *]
+Validating --> conv1.c.c = Convolution (conv1.w.W, featScaled) : [16 x 25], [28 x 28 x 1 x *] -> [28 x 28 x 16 x *]
+Validating --> conv1.b.b = LearnableParameter() :  -> [1 x 1 x 16]
+Validating --> conv1.cpb = Plus (conv1.c.c, conv1.b.b) : [28 x 28 x 16 x *], [1 x 1 x 16] -> [28 x 28 x 16 x *]
+Validating --> conv1.out = RectifiedLinear (conv1.cpb) : [28 x 28 x 16 x *] -> [28 x 28 x 16 x *]
+Validating --> pool1 = MaxPooling (conv1.out) : [28 x 28 x 16 x *] -> [14 x 14 x 16 x *]
+Validating --> conv2.c.c = Convolution (conv2.w.W, pool1) : [32 x 400], [14 x 14 x 16 x *] -> [14 x 14 x 32 x *]
+Validating --> conv2.b.b = LearnableParameter() :  -> [1 x 1 x 32]
+Validating --> conv2.cpb = Plus (conv2.c.c, conv2.b.b) : [14 x 14 x 32 x *], [1 x 1 x 32] -> [14 x 14 x 32 x *]
+Validating --> conv2.out = RectifiedLinear (conv2.cpb) : [14 x 14 x 32 x *] -> [14 x 14 x 32 x *]
+Validating --> pool2.p = Pooling (conv2.out) : [14 x 14 x 32 x *] -> [7 x 7 x 32 x *]
+Validating --> h1.t = Times (h1.W, pool2.p) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *] -> [128 x *]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [128 x *], [128 x 1] -> [128 x 1 x *]
+Validating --> h1.y = Sigmoid (h1.z) : [128 x 1 x *] -> [128 x 1 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 32, Stride: 1 x 1 x 16, Sharing: (1, 1, 1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+
+11 out of 27 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 08:16:18: Created model with 27 nodes on GPU 0.
+
+05/13/2016 08:16:18: Training criterion node(s):
+05/13/2016 08:16:18: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 08:16:18: Evaluation criterion node(s):
+
+05/13/2016 08:16:18: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *]] [features Gradient[28 x 28 x 1 x *]] [labels Gradient[10 x *]] }
+000000CB919F83E0: {[features Value[28 x 28 x 1 x *]] }
+000000CBAA188380: {[h1.W Value[128 x 7 x 7 x 32]] }
+000000CBAA188560: {[ol.W Value[10 x 128]] }
+000000CBAA1887E0: {[ol.b Value[10 x 1]] }
+000000CBAA188A60: {[featScale Value[1 x 1]] }
+000000CBAA188CE0: {[conv1.w.W Value[16 x 25]] }
+000000CBAA1890A0: {[labels Value[10 x *]] }
+000000CBAA189320: {[conv2.b.b Value[1 x 1 x 32]] }
+000000CBAA1893C0: {[conv2.w.W Value[32 x 400]] }
+000000CBAA189C80: {[conv1.b.b Value[1 x 1 x 16]] }
+000000CBAA189DC0: {[h1.b Value[128 x 1]] }
+000000CBB0834910: {[ol.z Value[10 x 1 x *]] }
+000000CBB0834AF0: {[err Value[1]] }
+000000CBB0834B90: {[ol.t Gradient[10 x 1 x *]] }
+000000CBB0834F50: {[conv1.c.c Gradient[28 x 28 x 16 x *]] [conv1.out Value[28 x 28 x 16 x *]] }
+000000CBB0834FF0: {[conv1.c.c Value[28 x 28 x 16 x *]] }
+000000CBB08353B0: {[featScaled Value[28 x 28 x 1 x *]] }
+000000CBB0835770: {[ce Value[1]] }
+000000CBB0835950: {[conv2.c.c Value[14 x 14 x 32 x *]] }
+000000CBB0835B30: {[conv2.b.b Gradient[1 x 1 x 32]] [conv2.out Gradient[14 x 14 x 32 x *]] [h1.t Value[128 x *]] }
+000000CBB0835BD0: {[h1.W Gradient[128 x 7 x 7 x 32]] [h1.z Value[128 x 1 x *]] }
+000000CBB0835C70: {[h1.t Gradient[128 x *]] [h1.y Value[128 x 1 x *]] }
+000000CBB0835DB0: {[conv2.cpb Gradient[14 x 14 x 32 x *]] [pool1 Gradient[14 x 14 x 16 x *]] [pool2.p Value[7 x 7 x 32 x *]] }
+000000CBB0835F90: {[ce Gradient[1]] }
+000000CBB0836350: {[conv1.cpb Value[28 x 28 x 16 x *]] [conv1.w.W Gradient[16 x 25]] }
+000000CBB08363F0: {[conv1.b.b Gradient[1 x 1 x 16]] [conv1.out Gradient[28 x 28 x 16 x *]] }
+000000CBB0836490: {[h1.z Gradient[128 x 1 x *]] [ol.t Value[10 x 1 x *]] [pool2.p Gradient[7 x 7 x 32 x *]] }
+000000CBB0836670: {[ol.b Gradient[10 x 1]] }
+000000CBB0836990: {[conv2.c.c Gradient[14 x 14 x 32 x *]] [conv2.out Value[14 x 14 x 32 x *]] }
+000000CBB0836A30: {[ol.W Gradient[10 x 128]] [ol.z Gradient[10 x 1 x *]] }
+000000CBB0836B70: {[conv2.cpb Value[14 x 14 x 32 x *]] [conv2.w.W Gradient[32 x 400]] }
+000000CBB0836CB0: {[h1.b Gradient[128 x 1]] [h1.y Gradient[128 x 1 x *]] }
+000000CBB0836E90: {[conv1.cpb Gradient[28 x 28 x 16 x *]] [pool1 Value[14 x 14 x 16 x *]] }
+
+05/13/2016 08:16:18: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 08:16:18: Starting Epoch 1: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:16:18: Starting minibatch loop.
+05/13/2016 08:16:22:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 1.52245886 * 16000; err = 0.53531250 * 16000; time = 4.2213s; samplesPerSecond = 3790.3
+05/13/2016 08:16:24:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.20213049 * 16000; err = 0.05737500 * 16000; time = 1.6650s; samplesPerSecond = 9609.8
+05/13/2016 08:16:26:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.11822998 * 16000; err = 0.03400000 * 16000; time = 1.6662s; samplesPerSecond = 9602.5
+05/13/2016 08:16:27: Finished Epoch[ 1 of 3]: [Training] ce = 0.51029333 * 60000; err = 0.17250000 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.003125; epochTime=8.83729s
+05/13/2016 08:16:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution.1'
+
+05/13/2016 08:16:27: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:16:27: Starting minibatch loop.
+05/13/2016 08:16:29:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.07765988 * 16000; err = 0.02281250 * 16000; time = 1.6655s; samplesPerSecond = 9606.6
+05/13/2016 08:16:30:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.06650398 * 16000; err = 0.01943750 * 16000; time = 1.6661s; samplesPerSecond = 9603.4
+05/13/2016 08:16:32:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.06597272 * 16000; err = 0.02025000 * 16000; time = 1.6655s; samplesPerSecond = 9607.0
+05/13/2016 08:16:33: Finished Epoch[ 2 of 3]: [Training] ce = 0.06707618 * 60000; err = 0.01993333 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=6.26303s
+05/13/2016 08:16:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution.2'
+
+05/13/2016 08:16:33: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.000000  momentum as time constant = 0.0 samples
+
+05/13/2016 08:16:33: Starting minibatch loop.
+05/13/2016 08:16:35:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.04900096 * 16000; err = 0.01531250 * 16000; time = 1.6660s; samplesPerSecond = 9603.7
+05/13/2016 08:16:37:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.04317124 * 16000; err = 0.01300000 * 16000; time = 1.6655s; samplesPerSecond = 9606.5
+05/13/2016 08:16:38:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.04517576 * 16000; err = 0.01293750 * 16000; time = 1.6628s; samplesPerSecond = 9622.2
+05/13/2016 08:16:40: Finished Epoch[ 3 of 3]: [Training] ce = 0.04463579 * 60000; err = 0.01335000 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=6.25721s
+05/13/2016 08:16:40: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_02_Convolution@release_gpu/Models/02_Convolution'
+05/13/2016 08:16:40: CNTKCommandTrainEnd: train
+
+05/13/2016 08:16:40: Action "train" complete.
+
+
+05/13/2016 08:16:40: ##############################################################################
+05/13/2016 08:16:40: #                                                                            #
+05/13/2016 08:16:40: # Action "test"                                                              #
+05/13/2016 08:16:40: #                                                                            #
+05/13/2016 08:16:40: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 27 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.w.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.w.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *1] -> [28 x 28 x 1 x *1]
+Validating --> conv1.c.c = Convolution (conv1.w.W, featScaled) : [16 x 25], [28 x 28 x 1 x *1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.b.b = LearnableParameter() :  -> [1 x 1 x 16]
+Validating --> conv1.cpb = Plus (conv1.c.c, conv1.b.b) : [28 x 28 x 16 x *1], [1 x 1 x 16] -> [28 x 28 x 16 x *1]
+Validating --> conv1.out = RectifiedLinear (conv1.cpb) : [28 x 28 x 16 x *1] -> [28 x 28 x 16 x *1]
+Validating --> pool1 = MaxPooling (conv1.out) : [28 x 28 x 16 x *1] -> [14 x 14 x 16 x *1]
+Validating --> conv2.c.c = Convolution (conv2.w.W, pool1) : [32 x 400], [14 x 14 x 16 x *1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.b.b = LearnableParameter() :  -> [1 x 1 x 32]
+Validating --> conv2.cpb = Plus (conv2.c.c, conv2.b.b) : [14 x 14 x 32 x *1], [1 x 1 x 32] -> [14 x 14 x 32 x *1]
+Validating --> conv2.out = RectifiedLinear (conv2.cpb) : [14 x 14 x 32 x *1] -> [14 x 14 x 32 x *1]
+Validating --> pool2.p = Pooling (conv2.out) : [14 x 14 x 32 x *1] -> [7 x 7 x 32 x *1]
+Validating --> h1.t = Times (h1.W, pool2.p) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *1] -> [128 x *1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.z = Plus (h1.t, h1.b) : [128 x *1], [128 x 1] -> [128 x 1 x *1]
+Validating --> h1.y = Sigmoid (h1.z) : [128 x 1 x *1] -> [128 x 1 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 32, Stride: 1 x 1 x 16, Sharing: (1, 1, 1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (1, 1, 0), LowerPad: 0, UpperPad: 0.
+
+
+11 out of 27 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[ce Gradient[1]] [conv1.b.b Gradient[1 x 1 x 16]] [conv1.c.c Gradient[28 x 28 x 16 x *1]] [conv1.cpb Gradient[28 x 28 x 16 x *1]] [conv1.out Gradient[28 x 28 x 16 x *1]] [conv1.w.W Gradient[16 x 25]] [conv2.b.b Gradient[1 x 1 x 32]] [conv2.c.c Gradient[14 x 14 x 32 x *1]] [conv2.cpb Gradient[14 x 14 x 32 x *1]] [conv2.out Gradient[14 x 14 x 32 x *1]] [conv2.w.W Gradient[32 x 400]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *1]] [features Gradient[28 x 28 x 1 x *1]] [h1.W Gradient[128 x 7 x 7 x 32]] [h1.b Gradient[128 x 1]] [h1.t Gradient[128 x *1]] [h1.y Gradient[128 x 1 x *1]] [h1.z Gradient[128 x 1 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 128]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x 1 x *1]] [ol.z Gradient[10 x 1 x *1]] [pool1 Gradient[14 x 14 x 16 x *1]] [pool2.p Gradient[7 x 7 x 32 x *1]] }
+000000CBAA188420: {[conv2.cpb Value[14 x 14 x 32 x *1]] }
+000000CBAA188BA0: {[pool2.p Value[7 x 7 x 32 x *1]] }
+000000CBAA1890A0: {[conv1.out Value[28 x 28 x 16 x *1]] }
+000000CBAA189140: {[conv2.c.c Value[14 x 14 x 32 x *1]] }
+000000CBAA1891E0: {[h1.t Value[128 x *1]] }
+000000CBAA189320: {[h1.z Value[128 x 1 x *1]] }
+000000CBAA1895A0: {[ol.t Value[10 x 1 x *1]] }
+000000CBAA189780: {[pool1 Value[14 x 14 x 16 x *1]] }
+000000CBAA189820: {[ol.z Value[10 x 1 x *1]] }
+000000CBAA189DC0: {[h1.y Value[128 x 1 x *1]] }
+000000CBAA18A0E0: {[conv2.out Value[14 x 14 x 32 x *1]] }
+000000CBB0834AF0: {[ol.W Value[10 x 128]] }
+000000CBB0834C30: {[h1.b Value[128 x 1]] }
+000000CBB0834FF0: {[features Value[28 x 28 x 1 x *1]] }
+000000CBB0835770: {[h1.W Value[128 x 7 x 7 x 32]] }
+000000CBB08358B0: {[featScale Value[1 x 1]] }
+000000CBB0835BD0: {[conv1.w.W Value[16 x 25]] }
+000000CBB08360D0: {[labels Value[10 x *1]] }
+000000CBB0836350: {[ol.b Value[10 x 1]] }
+000000CBB0836490: {[conv2.b.b Value[1 x 1 x 32]] }
+000000CBB0836A30: {[conv2.w.W Value[32 x 400]] }
+000000CBB0836CB0: {[conv1.b.b Value[1 x 1 x 16]] }
+000000CBB08371B0: {[err Value[1]] }
+000000CBB08372F0: {[conv1.cpb Value[28 x 28 x 16 x *1]] }
+000000CBB0837F70: {[featScaled Value[28 x 28 x 1 x *1]] }
+000000CBB08381F0: {[ce Value[1]] }
+000000CBB08383D0: {[conv1.c.c Value[28 x 28 x 16 x *1]] }
+
+05/13/2016 08:16:51: Final Results: Minibatch[1-625]: err = 0.01520000 * 10000; ce = 0.04488435 * 10000; perplexity = 1.04590689
+
+05/13/2016 08:16:51: Action "test" complete.
+
+05/13/2016 08:16:51: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/run-test
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/run-test
@ -0,0 +1,11 @@
+#!/bin/bash
+
+. $TEST_DIR/../run-test-common
+
+cntkrun  $CNTKTextConfigDir/02_Convolution.cntk  "train=[SGD=[maxEpochs=3]] imageLayout=\"$imageLayout\""
+ExitCode=$?
+
+# Delete the test data if copied
+[[ "$Copied" -eq "1" ]] && rm -rf "$DataDir"
+
+exit $ExitCode
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/testcases.yml
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/02_Convolution/testcases.yml
@ -0,0 +1,41 @@
+dataDir: ../../../../../../../Examples/Image/MNIST/Data
+
+tags:
+    # In BVT, run Release GPU 
+    - bvt-e (build_sku=='gpu') and (device=='gpu') and (flavor=='release') 
+    # In Nightly on Linux, run Debug GPU in addition.
+    - nightly-e (build_sku=='gpu') and (device=='gpu')
+
+testCases:
+  CNTK Run must be completed:
+    patterns:
+      - __COMPLETED__
+
+  Must train epochs in exactly same order and parameters:
+    patterns:
+      - Starting Epoch {{integer}}
+      - learning rate per sample = {{float}}
+      - momentum = {{float}}
+
+# TODO destabilized by integration of 16b606c (VSO item #414)
+#  Epochs (with low train loss) must be finished with expected results:
+#    patterns:
+#      - Finished Epoch[{{integer}} of {{integer}}]
+#      - ce = {{float,tolerance=0.05}} * {{integer}}
+#      - err = {{float,tolerance=0.005}} * {{integer}}
+#      - totalSamplesSeen = {{integer}}
+#      - learningRatePerSample = {{float,tolerance=0.1%}}
+
+  Per-minibatch (with low train loss) training results must match:
+    patterns:
+      # Ignores first set of minibatches at start of epoch, which sometimes has a larger deviation:
+      - 01-
+      - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
+      - ce = {{float,tolerance=0.05}} * {{integer}}
+      - err = {{float,tolerance=0.005}} * {{integer}}
+
+Final test results must match:
+    patterns:
+      - "Final Results: Minibatch[{{integer}}-{{integer}}]"
+      - err = {{float,tolerance=0.005}} * {{integer}}
+      - ce = {{float,tolerance=0.05}} * {{integer}}
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/README.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/README.txt
@ -0,0 +1,9 @@
+Test runtimes
+
+Windows:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm (debug gpu) - [OK] 214.39 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm (release gpu) - [OK] 64.81 sec
+
+Linux:
+Running test CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm (debug gpu) - [OK] 44.82 sec
+Running test CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm (release gpu) - [OK] 19.90 sec
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/baseline.linux.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/baseline.linux.txt
@ -0,0 +1,581 @@
+=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/03_ConvBatchNorm.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 14:50:25
+		Last modified date: Thu May 12 14:00:37 2016
+		Build type: release
+		Build target: GPU
+		With 1bit-SGD: no
+		Math lib: acml
+		CUDA_PATH: /usr/local/cuda-7.5
+		CUB_PATH: /usr/local/cub-1.4.1
+		CUDNN_PATH: /usr/local/cudnn-4.0
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by philly on d8dc82703b0f
+		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+-------------------------------------------------------------------
+Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+05/13/2016 15:10:29: -------------------------------------------------------------------
+05/13/2016 15:10:29: Build info: 
+
+05/13/2016 15:10:29: 		Built time: May 13 2016 14:50:25
+05/13/2016 15:10:29: 		Last modified date: Thu May 12 14:00:37 2016
+05/13/2016 15:10:29: 		Build type: release
+05/13/2016 15:10:29: 		Build target: GPU
+05/13/2016 15:10:29: 		With 1bit-SGD: no
+05/13/2016 15:10:29: 		Math lib: acml
+05/13/2016 15:10:29: 		CUDA_PATH: /usr/local/cuda-7.5
+05/13/2016 15:10:29: 		CUB_PATH: /usr/local/cub-1.4.1
+05/13/2016 15:10:29: 		CUDNN_PATH: /usr/local/cudnn-4.0
+05/13/2016 15:10:29: 		Build Branch: HEAD
+05/13/2016 15:10:29: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 15:10:29: 		Built by philly on d8dc82703b0f
+05/13/2016 15:10:29: 		Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
+05/13/2016 15:10:29: -------------------------------------------------------------------
+
+05/13/2016 15:10:29: Running on localhost at 2016/05/13 15:10:29
+05/13/2016 15:10:29: Command line: 
+/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk  configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/03_ConvBatchNorm.cntk  currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData  RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu  DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData  ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config  OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 15:10:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:29: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/03_ConvBatchNorm"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = "test"
+    minibatchSize = 32
+    modelPath=$ModelDir$/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:29: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 15:10:29: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm"
+ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = "test"
+    minibatchSize = 32
+    modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config
+OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 15:10:29: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 15:10:29: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 03_ConvBatchNorm.cntk:command=train:test
+configparameters: 03_ConvBatchNorm.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config
+configparameters: 03_ConvBatchNorm.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+configparameters: 03_ConvBatchNorm.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData
+configparameters: 03_ConvBatchNorm.cntk:deviceId=0
+configparameters: 03_ConvBatchNorm.cntk:imageLayout=cudnn
+configparameters: 03_ConvBatchNorm.cntk:initOnCPUOnly=true
+configparameters: 03_ConvBatchNorm.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models
+configparameters: 03_ConvBatchNorm.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+configparameters: 03_ConvBatchNorm.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/Macros.ndl
+configparameters: 03_ConvBatchNorm.cntk:numMBsToShowResult=500
+configparameters: 03_ConvBatchNorm.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+configparameters: 03_ConvBatchNorm.cntk:precision=float
+configparameters: 03_ConvBatchNorm.cntk:RootDir=..
+configparameters: 03_ConvBatchNorm.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu
+configparameters: 03_ConvBatchNorm.cntk:test=[
+    action = "test"
+    minibatchSize = 32
+    modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 03_ConvBatchNorm.cntk:timestamping=true
+configparameters: 03_ConvBatchNorm.cntk:traceLevel=1
+configparameters: 03_ConvBatchNorm.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/../../../../../../../Examples/Image/MNIST/Config/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 15:10:29: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 15:10:29: Commands: train test
+05/13/2016 15:10:29: Precision = "float"
+05/13/2016 15:10:29: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+05/13/2016 15:10:29: CNTKCommandTrainInfo: train : 3
+05/13/2016 15:10:29: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 15:10:29: ##############################################################################
+05/13/2016 15:10:29: #                                                                            #
+05/13/2016 15:10:29: # Action "train"                                                             #
+05/13/2016 15:10:29: #                                                                            #
+05/13/2016 15:10:29: ##############################################################################
+
+05/13/2016 15:10:29: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 15:10:29: Creating virgin network.
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 36 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 1568]
+Validating --> conv2.c.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.c.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *] -> [28 x 28 x 1 x *]
+Validating --> conv1.c.c.c = Convolution (conv1.c.W, featScaled) : [16 x 25], [28 x 28 x 1 x *] -> [28 x 28 x 16 x *]
+Validating --> conv1.c.c.sc = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.b = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.m = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.isd = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.y = BatchNormalization (conv1.c.c.c, conv1.c.c.sc, conv1.c.c.b, conv1.c.c.m, conv1.c.c.isd) : [28 x 28 x 16 x *], [16 x 1], [16 x 1], [16 x 1], [16 x 1] -> [28 x 28 x 16 x *]
+Validating --> conv1.y = RectifiedLinear (conv1.c.c.y) : [28 x 28 x 16 x *] -> [28 x 28 x 16 x *]
+Validating --> pool1 = MaxPooling (conv1.y) : [28 x 28 x 16 x *] -> [14 x 14 x 16 x *]
+Validating --> conv2.c.c.c = Convolution (conv2.c.W, pool1) : [32 x 400], [14 x 14 x 16 x *] -> [14 x 14 x 32 x *]
+Validating --> conv2.c.c.sc = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.b = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.m = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.isd = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.y = BatchNormalization (conv2.c.c.c, conv2.c.c.sc, conv2.c.c.b, conv2.c.c.m, conv2.c.c.isd) : [14 x 14 x 32 x *], [32 x 1], [32 x 1], [32 x 1], [32 x 1] -> [14 x 14 x 32 x *]
+Validating --> conv2.y = RectifiedLinear (conv2.c.c.y) : [14 x 14 x 32 x *] -> [14 x 14 x 32 x *]
+Validating --> pool2 = MaxPooling (conv2.y) : [14 x 14 x 32 x *] -> [7 x 7 x 32 x *]
+
+h1.t Times operation: For legacy compatibility, the sample layout of left input (h1.W LearnableParameter operation) was patched to [128 x 7 x 7 x 32] (from [128 x 1568])
+Validating --> h1.t = Times (h1.W, pool2) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *] -> [128 x *]
+Validating --> h1.sc = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.m = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.isd = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.bn = BatchNormalization (h1.t, h1.sc, h1.b, h1.m, h1.isd) : [128 x *], [128 x 1], [128 x 1], [128 x 1], [128 x 1] -> [128 x *]
+Validating --> h1.y = RectifiedLinear (h1.bn) : [128 x *] -> [128 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 1 x 1 x 32, Stride: 1 x 1 x 16, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+
+20 out of 36 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 15:10:29: Created model with 36 nodes on GPU 0.
+
+05/13/2016 15:10:29: Training criterion node(s):
+05/13/2016 15:10:29: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 15:10:29: Evaluation criterion node(s):
+
+05/13/2016 15:10:29: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[conv1.c.c.isd Gradient[16 x 1]] [conv1.c.c.m Gradient[16 x 1]] [conv2.c.c.isd Gradient[32 x 1]] [conv2.c.c.m Gradient[32 x 1]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *]] [features Gradient[28 x 28 x 1 x *]] [h1.isd Gradient[128 x 1]] [h1.m Gradient[128 x 1]] [labels Gradient[10 x *]] }
+0x2643328: {[features Value[28 x 28 x 1 x *]] }
+0x33a9468: {[featScale Value[1 x 1]] }
+0x33aa5e8: {[labels Value[10 x *]] }
+0x33ab128: {[conv1.c.W Value[16 x 25]] }
+0x33ab818: {[conv1.c.c.b Value[16 x 1]] }
+0x33ac238: {[conv1.c.c.sc Value[16 x 1]] }
+0x33ad108: {[conv1.c.c.m Value[16 x 1]] }
+0x33adbf8: {[conv1.c.c.isd Value[16 x 1]] }
+0x33af968: {[conv2.c.W Value[32 x 400]] }
+0x33b0878: {[conv2.c.c.b Value[32 x 1]] }
+0x33b1258: {[conv2.c.c.sc Value[32 x 1]] }
+0x33b1908: {[ol.b Value[10 x 1]] }
+0x33b1e78: {[conv2.c.c.m Value[32 x 1]] }
+0x33b29c8: {[conv2.c.c.isd Value[32 x 1]] }
+0x33b3968: {[h1.W Value[128 x 7 x 7 x 32]] }
+0x33b5408: {[h1.b Value[128 x 1]] }
+0x33b5e38: {[h1.sc Value[128 x 1]] }
+0x33b6738: {[h1.m Value[128 x 1]] }
+0x33b70b8: {[h1.isd Value[128 x 1]] }
+0x33b7618: {[ol.W Value[10 x 128]] }
+0x33be778: {[ce Value[1]] }
+0x33bfba8: {[ol.z Value[10 x 1 x *]] }
+0x33ff558: {[err Value[1]] }
+0x788fe48: {[conv1.c.c.c Value[28 x 28 x 16 x *]] }
+0x7890188: {[featScaled Value[28 x 28 x 1 x *]] }
+0x7890438: {[conv1.c.c.y Value[28 x 28 x 16 x *]] }
+0x7891238: {[conv1.c.c.c Gradient[28 x 28 x 16 x *]] [conv1.y Value[28 x 28 x 16 x *]] }
+0x78913f8: {[conv1.c.c.y Gradient[28 x 28 x 16 x *]] [pool1 Value[14 x 14 x 16 x *]] }
+0x78915b8: {[conv1.c.W Gradient[16 x 25]] [conv2.c.c.c Value[14 x 14 x 32 x *]] }
+0x7891778: {[conv1.c.c.sc Gradient[16 x 1]] [conv1.y Gradient[28 x 28 x 16 x *]] }
+0x7891938: {[conv2.c.c.y Value[14 x 14 x 32 x *]] }
+0x7891e78: {[conv1.c.c.b Gradient[16 x 1]] [conv2.c.c.c Gradient[14 x 14 x 32 x *]] [conv2.y Value[14 x 14 x 32 x *]] }
+0x7892038: {[conv2.c.c.y Gradient[14 x 14 x 32 x *]] [pool2 Value[7 x 7 x 32 x *]] }
+0x78921f8: {[conv2.c.c.sc Gradient[32 x 1]] [conv2.y Gradient[14 x 14 x 32 x *]] [h1.t Value[128 x *]] }
+0x78923b8: {[h1.bn Value[128 x *]] }
+0x7892738: {[conv2.c.c.b Gradient[32 x 1]] }
+0x78928f8: {[conv2.c.W Gradient[32 x 400]] [h1.t Gradient[128 x *]] [h1.y Value[128 x *]] }
+0x7892ab8: {[h1.bn Gradient[128 x *]] [ol.t Value[10 x *]] }
+0x78999e8: {[ce Gradient[1]] }
+0x7899ba8: {[ol.W Gradient[10 x 128]] [ol.z Gradient[10 x 1 x *]] }
+0x7899d68: {[ol.t Gradient[10 x *]] [pool1 Gradient[14 x 14 x 16 x *]] [pool2 Gradient[7 x 7 x 32 x *]] }
+0x7899f28: {[ol.b Gradient[10 x 1]] }
+0x789a0e8: {[h1.sc Gradient[128 x 1]] [h1.y Gradient[128 x *]] }
+0x789a2d8: {[h1.W Gradient[128 x 7 x 7 x 32]] }
+0x789a498: {[h1.b Gradient[128 x 1]] }
+
+05/13/2016 15:10:29: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 15:10:29: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 15:10:29: Starting minibatch loop.
+05/13/2016 15:10:31:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.18369328 * 16000; err = 0.05750000 * 16000; time = 2.0641s; samplesPerSecond = 7751.5
+05/13/2016 15:10:32:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.08101009 * 16000; err = 0.02425000 * 16000; time = 1.0283s; samplesPerSecond = 15560.4
+05/13/2016 15:10:33:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.06876743 * 16000; err = 0.02125000 * 16000; time = 1.0403s; samplesPerSecond = 15380.9
+05/13/2016 15:10:34: Finished Epoch[ 1 of 3]: [Training] ce = 0.09983698 * 60000; err = 0.03098333 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.015625; epochTime=4.9337s
+05/13/2016 15:10:34: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm.1'
+Setting batch normalization blend time constant to inf.
+
+05/13/2016 15:10:34: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 15:10:34: Starting minibatch loop.
+05/13/2016 15:10:35:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.02224222 * 16000; err = 0.00756250 * 16000; time = 1.0463s; samplesPerSecond = 15292.5
+05/13/2016 15:10:36:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.01788928 * 16000; err = 0.00568750 * 16000; time = 1.0489s; samplesPerSecond = 15254.3
+05/13/2016 15:10:37:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.01989119 * 16000; err = 0.00543750 * 16000; time = 1.0414s; samplesPerSecond = 15363.9
+05/13/2016 15:10:38: Finished Epoch[ 2 of 3]: [Training] ce = 0.02009503 * 60000; err = 0.00623333 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=3.92922s
+05/13/2016 15:10:38: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm.2'
+
+05/13/2016 15:10:38: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 15:10:38: Starting minibatch loop.
+05/13/2016 15:10:39:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.01173781 * 16000; err = 0.00306250 * 16000; time = 1.0390s; samplesPerSecond = 15400.0
+05/13/2016 15:10:40:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.01463517 * 16000; err = 0.00431250 * 16000; time = 1.0397s; samplesPerSecond = 15388.4
+05/13/2016 15:10:41:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.01582476 * 16000; err = 0.00493750 * 16000; time = 1.0373s; samplesPerSecond = 15425.2
+05/13/2016 15:10:42: Finished Epoch[ 3 of 3]: [Training] ce = 0.01382984 * 60000; err = 0.00401667 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=3.9054s
+05/13/2016 15:10:42: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm'
+05/13/2016 15:10:42: CNTKCommandTrainEnd: train
+
+05/13/2016 15:10:42: Action "train" complete.
+
+
+05/13/2016 15:10:42: ##############################################################################
+05/13/2016 15:10:42: #                                                                            #
+05/13/2016 15:10:42: # Action "test"                                                              #
+05/13/2016 15:10:42: #                                                                            #
+05/13/2016 15:10:42: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 36 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.c.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.c.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *1] -> [28 x 28 x 1 x *1]
+Validating --> conv1.c.c.c = Convolution (conv1.c.W, featScaled) : [16 x 25], [28 x 28 x 1 x *1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.c.c.sc = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.b = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.m = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.isd = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.y = BatchNormalization (conv1.c.c.c, conv1.c.c.sc, conv1.c.c.b, conv1.c.c.m, conv1.c.c.isd) : [28 x 28 x 16 x *1], [16 x 1], [16 x 1], [16 x 1], [16 x 1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.c.c.y) : [28 x 28 x 16 x *1] -> [28 x 28 x 16 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [28 x 28 x 16 x *1] -> [14 x 14 x 16 x *1]
+Validating --> conv2.c.c.c = Convolution (conv2.c.W, pool1) : [32 x 400], [14 x 14 x 16 x *1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.c.c.sc = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.b = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.m = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.isd = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.y = BatchNormalization (conv2.c.c.c, conv2.c.c.sc, conv2.c.c.b, conv2.c.c.m, conv2.c.c.isd) : [14 x 14 x 32 x *1], [32 x 1], [32 x 1], [32 x 1], [32 x 1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.c.c.y) : [14 x 14 x 32 x *1] -> [14 x 14 x 32 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [14 x 14 x 32 x *1] -> [7 x 7 x 32 x *1]
+Validating --> h1.t = Times (h1.W, pool2) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *1] -> [128 x *1]
+Validating --> h1.sc = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.m = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.isd = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.bn = BatchNormalization (h1.t, h1.sc, h1.b, h1.m, h1.isd) : [128 x *1], [128 x 1], [128 x 1], [128 x 1], [128 x 1] -> [128 x *1]
+Validating --> h1.y = RectifiedLinear (h1.bn) : [128 x *1] -> [128 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 1 x 1 x 32, Stride: 1 x 1 x 16, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+
+20 out of 36 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+(nil): {[ce Gradient[1]] [conv1.c.W Gradient[16 x 25]] [conv1.c.c.b Gradient[16 x 1]] [conv1.c.c.c Gradient[28 x 28 x 16 x *1]] [conv1.c.c.isd Gradient[16 x 1]] [conv1.c.c.m Gradient[16 x 1]] [conv1.c.c.sc Gradient[16 x 1]] [conv1.c.c.y Gradient[28 x 28 x 16 x *1]] [conv1.y Gradient[28 x 28 x 16 x *1]] [conv2.c.W Gradient[32 x 400]] [conv2.c.c.b Gradient[32 x 1]] [conv2.c.c.c Gradient[14 x 14 x 32 x *1]] [conv2.c.c.isd Gradient[32 x 1]] [conv2.c.c.m Gradient[32 x 1]] [conv2.c.c.sc Gradient[32 x 1]] [conv2.c.c.y Gradient[14 x 14 x 32 x *1]] [conv2.y Gradient[14 x 14 x 32 x *1]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *1]] [features Gradient[28 x 28 x 1 x *1]] [h1.W Gradient[128 x 7 x 7 x 32]] [h1.b Gradient[128 x 1]] [h1.bn Gradient[128 x *1]] [h1.isd Gradient[128 x 1]] [h1.m Gradient[128 x 1]] [h1.sc Gradient[128 x 1]] [h1.t Gradient[128 x *1]] [h1.y Gradient[128 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 128]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x *1]] [ol.z Gradient[10 x 1 x *1]] [pool1 Gradient[14 x 14 x 16 x *1]] [pool2 Gradient[7 x 7 x 32 x *1]] }
+0x7f50cab10a28: {[h1.sc Value[128 x 1]] }
+0x7f50cab11988: {[h1.W Value[128 x 7 x 7 x 32]] }
+0x7f50cab132e8: {[labels Value[10 x *1]] }
+0x7f50cab13968: {[ol.b Value[10 x 1]] }
+0x7f50cab14c88: {[h1.b Value[128 x 1]] }
+0x7f50cab15368: {[h1.isd Value[128 x 1]] }
+0x7f50cab15da8: {[h1.m Value[128 x 1]] }
+0x7f50cab160c8: {[conv1.c.c.isd Value[16 x 1]] }
+0x7f50cab17e68: {[ol.W Value[10 x 128]] }
+0x7f50cab1ac98: {[ce Value[1]] }
+0x7f50cab1c4f8: {[err Value[1]] }
+0x7f50cabd0b58: {[conv1.c.c.c Value[28 x 28 x 16 x *1]] }
+0x7f50cabd0e98: {[featScaled Value[28 x 28 x 1 x *1]] }
+0x7f50cabd1148: {[conv1.c.c.y Value[28 x 28 x 16 x *1]] }
+0x7f50cabd1f48: {[conv1.y Value[28 x 28 x 16 x *1]] }
+0x7f50cabd2108: {[pool1 Value[14 x 14 x 16 x *1]] }
+0x7f50cabd22c8: {[conv2.c.c.c Value[14 x 14 x 32 x *1]] }
+0x7f50cabd2648: {[conv2.c.c.y Value[14 x 14 x 32 x *1]] }
+0x7f50cabd2b88: {[conv2.y Value[14 x 14 x 32 x *1]] }
+0x7f50cabd2d48: {[pool2 Value[7 x 7 x 32 x *1]] }
+0x7f50cabd2f08: {[h1.t Value[128 x *1]] }
+0x7f50cabd9558: {[h1.bn Value[128 x *1]] }
+0x7f50cabd9a98: {[h1.y Value[128 x *1]] }
+0x7f50cabd9c58: {[ol.t Value[10 x *1]] }
+0x7f50cabd9e18: {[ol.z Value[10 x 1 x *1]] }
+0x7f50cad85a38: {[conv1.c.c.b Value[16 x 1]] }
+0x7f50d5601148: {[conv2.c.c.sc Value[32 x 1]] }
+0x7f50d5601ea8: {[conv2.c.W Value[32 x 400]] }
+0x7f50d5602728: {[conv1.c.W Value[16 x 25]] }
+0x7f50d5602e58: {[conv2.c.c.b Value[32 x 1]] }
+0x7f50d5603b28: {[conv1.c.c.sc Value[16 x 1]] }
+0x7f50d56045d8: {[conv1.c.c.m Value[16 x 1]] }
+0x7f50d5606dd8: {[conv2.c.c.isd Value[32 x 1]] }
+0x7f50d5608478: {[conv2.c.c.m Value[32 x 1]] }
+0x7f50d5609d38: {[featScale Value[1 x 1]] }
+0x7f50d560a658: {[features Value[28 x 28 x 1 x *1]] }
+
+05/13/2016 15:10:47: Final Results: Minibatch[1-313]: err = 0.00660000 * 10000; ce = 0.02083102 * 10000; perplexity = 1.02104950
+
+05/13/2016 15:10:47: Action "test" complete.
+
+05/13/2016 15:10:47: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/baseline.windows.txt
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/baseline.windows.txt
@ -0,0 +1,579 @@
+=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/03_ConvBatchNorm.cntk currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu DeviceId=0 timestamping=true train=[SGD=[maxEpochs=3]] imageLayout="cudnn"
+-------------------------------------------------------------------
+Build info: 
+
+		Built time: May 13 2016 08:06:01
+		Last modified date: Thu May 12 07:31:50 2016
+		Build type: Release
+		Build target: GPU
+		With 1bit-SGD: no
+		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+		CUB_PATH: c:\src\cub-1.4.1
+		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+		Build Branch: HEAD
+		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+		Built by svcphil on Philly-Pool3
+		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+-------------------------------------------------------------------
+Changed current directory to C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+05/13/2016 08:16:56: -------------------------------------------------------------------
+05/13/2016 08:16:56: Build info: 
+
+05/13/2016 08:16:56: 		Built time: May 13 2016 08:06:01
+05/13/2016 08:16:56: 		Last modified date: Thu May 12 07:31:50 2016
+05/13/2016 08:16:56: 		Build type: Release
+05/13/2016 08:16:56: 		Build target: GPU
+05/13/2016 08:16:56: 		With 1bit-SGD: no
+05/13/2016 08:16:56: 		CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
+05/13/2016 08:16:56: 		CUB_PATH: c:\src\cub-1.4.1
+05/13/2016 08:16:56: 		CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
+05/13/2016 08:16:56: 		Build Branch: HEAD
+05/13/2016 08:16:56: 		Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
+05/13/2016 08:16:56: 		Built by svcphil on Philly-Pool3
+05/13/2016 08:16:56: 		Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
+05/13/2016 08:16:56: -------------------------------------------------------------------
+
+05/13/2016 08:16:56: Running on Philly-Pool2 at 2016/05/13 08:16:56
+05/13/2016 08:16:56: Command line: 
+C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe  configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/Config/03_ConvBatchNorm.cntk  currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData  RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu  DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData  ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config  OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu  DeviceId=0  timestamping=true  train=[SGD=[maxEpochs=3]]  imageLayout="cudnn"
+
+
+
+05/13/2016 08:16:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:16:56: RootDir = ".."
+ConfigDir = "$RootDir$/Config"
+DataDir   = "$RootDir$/Data"
+OutputDir = "$RootDir$/Output"
+ModelDir  = "$OutputDir$/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "$ModelDir$/03_ConvBatchNorm"
+ndlMacros = "$ConfigDir$/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = "test"
+    minibatchSize = 32
+    modelPath=$ModelDir$/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "$ConfigDir$/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "$DataDir$/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:16:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:16:56: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+05/13/2016 08:16:56: RootDir = ".."
+ConfigDir = "../Config"
+DataDir   = "../Data"
+OutputDir = "../Output"
+ModelDir  = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models"
+deviceId = 0
+imageLayout = "cudnn"
+command = train:test
+precision = "float"
+modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm"
+ndlMacros = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl"
+traceLevel=1
+numMBsToShowResult=500
+initOnCPUOnly=true
+train = [
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+test = [
+    action = "test"
+    minibatchSize = 32
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+DeviceId=0
+timestamping=true
+train=[SGD=[maxEpochs=3]]
+imageLayout="cudnn"
+
+05/13/2016 08:16:56: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+
+05/13/2016 08:16:56: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
+configparameters: 03_ConvBatchNorm.cntk:command=train:test
+configparameters: 03_ConvBatchNorm.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config
+configparameters: 03_ConvBatchNorm.cntk:currentDirectory=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+configparameters: 03_ConvBatchNorm.cntk:DataDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData
+configparameters: 03_ConvBatchNorm.cntk:deviceId=0
+configparameters: 03_ConvBatchNorm.cntk:imageLayout=cudnn
+configparameters: 03_ConvBatchNorm.cntk:initOnCPUOnly=true
+configparameters: 03_ConvBatchNorm.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models
+configparameters: 03_ConvBatchNorm.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+configparameters: 03_ConvBatchNorm.cntk:ndlMacros=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/Macros.ndl
+configparameters: 03_ConvBatchNorm.cntk:numMBsToShowResult=500
+configparameters: 03_ConvBatchNorm.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+configparameters: 03_ConvBatchNorm.cntk:precision=float
+configparameters: 03_ConvBatchNorm.cntk:RootDir=..
+configparameters: 03_ConvBatchNorm.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu
+configparameters: 03_ConvBatchNorm.cntk:test=[
+    action = "test"
+    minibatchSize = 32
+    modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/03_ConvBatchNorm.ndl"
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData/Test-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+]
+
+configparameters: 03_ConvBatchNorm.cntk:timestamping=true
+configparameters: 03_ConvBatchNorm.cntk:traceLevel=1
+configparameters: 03_ConvBatchNorm.cntk:train=[
+    action = "train"
+    NDLNetworkBuilder = [
+        networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Image\MNIST\Config/03_ConvBatchNorm.ndl"
+    ]
+    SGD = [
+        epochSize = 60000
+        minibatchSize = 32
+        learningRatesPerMB = 0.5:0.1
+        momentumPerMB = 0.9
+        maxEpochs = 2
+        batchNormalizationBlendTimeConstant=0:1#INF
+    ]
+    reader = [
+        readerType = "CNTKTextFormatReader"
+        file = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu\TestData/Train-28x28_cntk_text.txt"
+        input = [
+            features = [
+                dim = 784
+                format = "dense"
+            ]
+            labels = [
+                dim = 10
+                format = "dense"
+            ]
+        ]
+    ]
+] [SGD=[maxEpochs=3]]
+
+05/13/2016 08:16:56: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
+05/13/2016 08:16:56: Commands: train test
+05/13/2016 08:16:56: Precision = "float"
+05/13/2016 08:16:56: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm
+05/13/2016 08:16:56: CNTKCommandTrainInfo: train : 3
+05/13/2016 08:16:56: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3
+
+05/13/2016 08:16:56: ##############################################################################
+05/13/2016 08:16:56: #                                                                            #
+05/13/2016 08:16:56: # Action "train"                                                             #
+05/13/2016 08:16:56: #                                                                            #
+05/13/2016 08:16:56: ##############################################################################
+
+05/13/2016 08:16:56: CNTKCommandTrainBegin: train
+NDLBuilder Using GPU 0
+
+05/13/2016 08:16:57: Creating virgin network.
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 36 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 1568]
+Validating --> conv2.c.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.c.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *] -> [28 x 28 x 1 x *]
+Validating --> conv1.c.c.c = Convolution (conv1.c.W, featScaled) : [16 x 25], [28 x 28 x 1 x *] -> [28 x 28 x 16 x *]
+Validating --> conv1.c.c.sc = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.b = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.m = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.isd = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.y = BatchNormalization (conv1.c.c.c, conv1.c.c.sc, conv1.c.c.b, conv1.c.c.m, conv1.c.c.isd) : [28 x 28 x 16 x *], [16 x 1], [16 x 1], [16 x 1], [16 x 1] -> [28 x 28 x 16 x *]
+Validating --> conv1.y = RectifiedLinear (conv1.c.c.y) : [28 x 28 x 16 x *] -> [28 x 28 x 16 x *]
+Validating --> pool1 = MaxPooling (conv1.y) : [28 x 28 x 16 x *] -> [14 x 14 x 16 x *]
+Validating --> conv2.c.c.c = Convolution (conv2.c.W, pool1) : [32 x 400], [14 x 14 x 16 x *] -> [14 x 14 x 32 x *]
+Validating --> conv2.c.c.sc = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.b = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.m = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.isd = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.y = BatchNormalization (conv2.c.c.c, conv2.c.c.sc, conv2.c.c.b, conv2.c.c.m, conv2.c.c.isd) : [14 x 14 x 32 x *], [32 x 1], [32 x 1], [32 x 1], [32 x 1] -> [14 x 14 x 32 x *]
+Validating --> conv2.y = RectifiedLinear (conv2.c.c.y) : [14 x 14 x 32 x *] -> [14 x 14 x 32 x *]
+Validating --> pool2 = MaxPooling (conv2.y) : [14 x 14 x 32 x *] -> [7 x 7 x 32 x *]
+
+h1.t Times operation: For legacy compatibility, the sample layout of left input (h1.W LearnableParameter operation) was patched to [128 x 7 x 7 x 32] (from [128 x 1568])
+Validating --> h1.t = Times (h1.W, pool2) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *] -> [128 x *]
+Validating --> h1.sc = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.m = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.isd = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.bn = BatchNormalization (h1.t, h1.sc, h1.b, h1.m, h1.isd) : [128 x *], [128 x 1], [128 x 1], [128 x 1], [128 x 1] -> [128 x *]
+Validating --> h1.y = RectifiedLinear (h1.bn) : [128 x *] -> [128 x *]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 1 x 1 x 32, Stride: 1 x 1 x 16, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+
+20 out of 36 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+05/13/2016 08:16:58: Created model with 36 nodes on GPU 0.
+
+05/13/2016 08:16:58: Training criterion node(s):
+05/13/2016 08:16:58: 	ce = CrossEntropyWithSoftmax
+
+05/13/2016 08:16:58: Evaluation criterion node(s):
+
+05/13/2016 08:16:58: 	err = ErrorPrediction
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[conv1.c.c.isd Gradient[16 x 1]] [conv1.c.c.m Gradient[16 x 1]] [conv2.c.c.isd Gradient[32 x 1]] [conv2.c.c.m Gradient[32 x 1]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *]] [features Gradient[28 x 28 x 1 x *]] [h1.isd Gradient[128 x 1]] [h1.m Gradient[128 x 1]] [labels Gradient[10 x *]] }
+00000093B2DC5E20: {[features Value[28 x 28 x 1 x *]] }
+00000093CB445890: {[ol.W Value[10 x 128]] }
+00000093CB446290: {[ol.b Value[10 x 1]] }
+00000093CB5EF4D0: {[conv2.c.c.m Value[32 x 1]] }
+00000093CB5EF570: {[conv1.c.W Value[16 x 25]] }
+00000093CB5EF610: {[h1.sc Value[128 x 1]] }
+00000093CB5EF9D0: {[conv2.c.c.b Value[32 x 1]] }
+00000093CB5EFBB0: {[h1.b Value[128 x 1]] }
+00000093CB5EFCF0: {[h1.isd Value[128 x 1]] }
+00000093CB5EFD90: {[h1.m Value[128 x 1]] }
+00000093CB5F03D0: {[conv1.c.c.b Value[16 x 1]] }
+00000093CB5F0470: {[conv1.c.c.sc Value[16 x 1]] }
+00000093CB5F05B0: {[conv1.c.c.isd Value[16 x 1]] }
+00000093CB5F06F0: {[conv2.c.W Value[32 x 400]] }
+00000093CB5F0830: {[conv2.c.c.sc Value[32 x 1]] }
+00000093CB5F08D0: {[conv2.c.c.isd Value[32 x 1]] }
+00000093CB5F0970: {[labels Value[10 x *]] }
+00000093CB5F0BF0: {[conv1.c.c.m Value[16 x 1]] }
+00000093CB5F0D30: {[featScale Value[1 x 1]] }
+00000093CB5F0DD0: {[h1.W Value[128 x 7 x 7 x 32]] }
+00000093D1AAE180: {[conv2.c.c.b Gradient[32 x 1]] }
+00000093D1AAE360: {[ol.t Gradient[10 x *]] [pool1 Gradient[14 x 14 x 16 x *]] [pool2 Gradient[7 x 7 x 32 x *]] }
+00000093D1AAE400: {[h1.W Gradient[128 x 7 x 7 x 32]] }
+00000093D1AAE5E0: {[conv1.c.c.c Gradient[28 x 28 x 16 x *]] [conv1.y Value[28 x 28 x 16 x *]] }
+00000093D1AAE680: {[h1.b Gradient[128 x 1]] }
+00000093D1AAE9A0: {[err Value[1]] }
+00000093D1AAED60: {[ol.z Value[10 x 1 x *]] }
+00000093D1AAEE00: {[ce Value[1]] }
+00000093D1AAEF40: {[conv1.c.c.y Value[28 x 28 x 16 x *]] }
+00000093D1AAF080: {[ol.b Gradient[10 x 1]] }
+00000093D1AAF120: {[conv1.c.W Gradient[16 x 25]] [conv2.c.c.c Value[14 x 14 x 32 x *]] }
+00000093D1AAF1C0: {[h1.bn Gradient[128 x *]] [ol.t Value[10 x *]] }
+00000093D1AAF260: {[conv1.c.c.y Gradient[28 x 28 x 16 x *]] [pool1 Value[14 x 14 x 16 x *]] }
+00000093D1AAF440: {[ol.W Gradient[10 x 128]] [ol.z Gradient[10 x 1 x *]] }
+00000093D1AAFB20: {[h1.bn Value[128 x *]] }
+00000093D1AAFDA0: {[conv1.c.c.c Value[28 x 28 x 16 x *]] }
+00000093D1AAFE40: {[conv2.c.c.y Value[14 x 14 x 32 x *]] }
+00000093D1AAFF80: {[conv2.c.c.y Gradient[14 x 14 x 32 x *]] [pool2 Value[7 x 7 x 32 x *]] }
+00000093D1AB0020: {[conv2.c.W Gradient[32 x 400]] [h1.t Gradient[128 x *]] [h1.y Value[128 x *]] }
+00000093D1AB00C0: {[h1.sc Gradient[128 x 1]] [h1.y Gradient[128 x *]] }
+00000093D1AB0200: {[ce Gradient[1]] }
+00000093D1AB03E0: {[conv1.c.c.sc Gradient[16 x 1]] [conv1.y Gradient[28 x 28 x 16 x *]] }
+00000093D1AB0480: {[conv1.c.c.b Gradient[16 x 1]] [conv2.c.c.c Gradient[14 x 14 x 32 x *]] [conv2.y Value[14 x 14 x 32 x *]] }
+00000093D1AB0660: {[featScaled Value[28 x 28 x 1 x *]] }
+00000093D1AB0700: {[conv2.c.c.sc Gradient[32 x 1]] [conv2.y Gradient[14 x 14 x 32 x *]] [h1.t Value[128 x *]] }
+
+05/13/2016 08:16:58: No PreCompute nodes found, skipping PreCompute step.
+
+05/13/2016 08:16:58: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 08:16:58: Starting minibatch loop.
+05/13/2016 08:17:02:  Epoch[ 1 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.17330922 * 16000; err = 0.05325000 * 16000; time = 4.3656s; samplesPerSecond = 3665.0
+05/13/2016 08:17:04:  Epoch[ 1 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.07897408 * 16000; err = 0.02456250 * 16000; time = 1.7980s; samplesPerSecond = 8899.0
+05/13/2016 08:17:06:  Epoch[ 1 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.06288062 * 16000; err = 0.02012500 * 16000; time = 1.7989s; samplesPerSecond = 8894.1
+05/13/2016 08:17:07: Finished Epoch[ 1 of 3]: [Training] ce = 0.09585953 * 60000; err = 0.02956667 * 60000; totalSamplesSeen = 60000; learningRatePerSample = 0.015625; epochTime=9.34707s
+05/13/2016 08:17:07: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm.1'
+Setting batch normalization blend time constant to 1.#INF.
+
+05/13/2016 08:17:07: Starting Epoch 2: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 08:17:07: Starting minibatch loop.
+05/13/2016 08:17:09:  Epoch[ 2 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.02381749 * 16000; err = 0.00700000 * 16000; time = 1.7975s; samplesPerSecond = 8901.2
+05/13/2016 08:17:11:  Epoch[ 2 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.02147904 * 16000; err = 0.00687500 * 16000; time = 1.7971s; samplesPerSecond = 8903.3
+05/13/2016 08:17:13:  Epoch[ 2 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.01875302 * 16000; err = 0.00581250 * 16000; time = 1.7965s; samplesPerSecond = 8906.1
+05/13/2016 08:17:14: Finished Epoch[ 2 of 3]: [Training] ce = 0.02042361 * 60000; err = 0.00626667 * 60000; totalSamplesSeen = 120000; learningRatePerSample = 0.003125; epochTime=6.7551s
+05/13/2016 08:17:14: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm.2'
+
+05/13/2016 08:17:14: Starting Epoch 3: learning rate per sample = 0.003125  effective momentum = 0.900000  momentum as time constant = 303.7 samples
+
+05/13/2016 08:17:14: Starting minibatch loop.
+05/13/2016 08:17:16:  Epoch[ 3 of 3]-Minibatch[   1- 500, 26.67%]: ce = 0.01552748 * 16000; err = 0.00400000 * 16000; time = 1.7980s; samplesPerSecond = 8899.0
+05/13/2016 08:17:18:  Epoch[ 3 of 3]-Minibatch[ 501-1000, 53.33%]: ce = 0.01295741 * 16000; err = 0.00356250 * 16000; time = 1.7961s; samplesPerSecond = 8908.2
+05/13/2016 08:17:20:  Epoch[ 3 of 3]-Minibatch[1001-1500, 80.00%]: ce = 0.01382423 * 16000; err = 0.00393750 * 16000; time = 1.7964s; samplesPerSecond = 8906.7
+05/13/2016 08:17:21: Finished Epoch[ 3 of 3]: [Training] ce = 0.01415997 * 60000; err = 0.00391667 * 60000; totalSamplesSeen = 180000; learningRatePerSample = 0.003125; epochTime=6.75556s
+05/13/2016 08:17:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160513081543.861015\CNTKTextFormatReader\Examples\Image\MNIST_03_ConvBatchNorm@release_gpu/Models/03_ConvBatchNorm'
+05/13/2016 08:17:21: CNTKCommandTrainEnd: train
+
+05/13/2016 08:17:21: Action "train" complete.
+
+
+05/13/2016 08:17:21: ##############################################################################
+05/13/2016 08:17:21: #                                                                            #
+05/13/2016 08:17:21: # Action "test"                                                              #
+05/13/2016 08:17:21: #                                                                            #
+05/13/2016 08:17:21: ##############################################################################
+
+
+Post-processing network...
+
+3 roots:
+	ce = CrossEntropyWithSoftmax()
+	err = ErrorPrediction()
+	ol.z = Plus()
+
+Validating network. 36 nodes to process in pass 1.
+
+Validating --> labels = InputValue() :  -> [10 x *1]
+Validating --> ol.W = LearnableParameter() :  -> [10 x 128]
+Validating --> h1.W = LearnableParameter() :  -> [128 x 7 x 7 x 32]
+Validating --> conv2.c.W = LearnableParameter() :  -> [32 x 400]
+Validating --> conv1.c.W = LearnableParameter() :  -> [16 x 25]
+Validating --> featScale = LearnableParameter() :  -> [1 x 1]
+Validating --> features = InputValue() :  -> [28 x 28 x 1 x *1]
+Validating --> featScaled = ElementTimes (featScale, features) : [1 x 1], [28 x 28 x 1 x *1] -> [28 x 28 x 1 x *1]
+Validating --> conv1.c.c.c = Convolution (conv1.c.W, featScaled) : [16 x 25], [28 x 28 x 1 x *1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.c.c.sc = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.b = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.m = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.isd = LearnableParameter() :  -> [16 x 1]
+Validating --> conv1.c.c.y = BatchNormalization (conv1.c.c.c, conv1.c.c.sc, conv1.c.c.b, conv1.c.c.m, conv1.c.c.isd) : [28 x 28 x 16 x *1], [16 x 1], [16 x 1], [16 x 1], [16 x 1] -> [28 x 28 x 16 x *1]
+Validating --> conv1.y = RectifiedLinear (conv1.c.c.y) : [28 x 28 x 16 x *1] -> [28 x 28 x 16 x *1]
+Validating --> pool1 = MaxPooling (conv1.y) : [28 x 28 x 16 x *1] -> [14 x 14 x 16 x *1]
+Validating --> conv2.c.c.c = Convolution (conv2.c.W, pool1) : [32 x 400], [14 x 14 x 16 x *1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.c.c.sc = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.b = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.m = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.isd = LearnableParameter() :  -> [32 x 1]
+Validating --> conv2.c.c.y = BatchNormalization (conv2.c.c.c, conv2.c.c.sc, conv2.c.c.b, conv2.c.c.m, conv2.c.c.isd) : [14 x 14 x 32 x *1], [32 x 1], [32 x 1], [32 x 1], [32 x 1] -> [14 x 14 x 32 x *1]
+Validating --> conv2.y = RectifiedLinear (conv2.c.c.y) : [14 x 14 x 32 x *1] -> [14 x 14 x 32 x *1]
+Validating --> pool2 = MaxPooling (conv2.y) : [14 x 14 x 32 x *1] -> [7 x 7 x 32 x *1]
+Validating --> h1.t = Times (h1.W, pool2) : [128 x 7 x 7 x 32], [7 x 7 x 32 x *1] -> [128 x *1]
+Validating --> h1.sc = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.b = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.m = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.isd = LearnableParameter() :  -> [128 x 1]
+Validating --> h1.bn = BatchNormalization (h1.t, h1.sc, h1.b, h1.m, h1.isd) : [128 x *1], [128 x 1], [128 x 1], [128 x 1], [128 x 1] -> [128 x *1]
+Validating --> h1.y = RectifiedLinear (h1.bn) : [128 x *1] -> [128 x *1]
+Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
+Validating --> ol.b = LearnableParameter() :  -> [10 x 1]
+Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
+Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
+
+Validating network. 16 nodes to process in pass 2.
+
+
+Validating network, final pass.
+
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 1, Output: 28 x 28 x 16, Kernel: 5 x 5 x 1, Map: 1 x 1 x 16, Stride: 1 x 1 x 1, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 28 x 28 x 16, Output: 14 x 14 x 16, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 16, Output: 14 x 14 x 32, Kernel: 5 x 5 x 16, Map: 1 x 1 x 32, Stride: 1 x 1 x 16, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+Using cuDNN convolution engine for geometry: Input: 14 x 14 x 32, Output: 7 x 7 x 32, Kernel: 2 x 2 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
+
+Using CNTK batch normalization engine.
+
+
+20 out of 36 nodes do not share the minibatch layout with the input data.
+
+Post-processing network complete.
+
+evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.
+
+
+Allocating matrices for forward and/or backward propagation.
+
+Memory Sharing Structure:
+
+0000000000000000: {[ce Gradient[1]] [conv1.c.W Gradient[16 x 25]] [conv1.c.c.b Gradient[16 x 1]] [conv1.c.c.c Gradient[28 x 28 x 16 x *1]] [conv1.c.c.isd Gradient[16 x 1]] [conv1.c.c.m Gradient[16 x 1]] [conv1.c.c.sc Gradient[16 x 1]] [conv1.c.c.y Gradient[28 x 28 x 16 x *1]] [conv1.y Gradient[28 x 28 x 16 x *1]] [conv2.c.W Gradient[32 x 400]] [conv2.c.c.b Gradient[32 x 1]] [conv2.c.c.c Gradient[14 x 14 x 32 x *1]] [conv2.c.c.isd Gradient[32 x 1]] [conv2.c.c.m Gradient[32 x 1]] [conv2.c.c.sc Gradient[32 x 1]] [conv2.c.c.y Gradient[14 x 14 x 32 x *1]] [conv2.y Gradient[14 x 14 x 32 x *1]] [err Gradient[1]] [featScale Gradient[1 x 1]] [featScaled Gradient[28 x 28 x 1 x *1]] [features Gradient[28 x 28 x 1 x *1]] [h1.W Gradient[128 x 7 x 7 x 32]] [h1.b Gradient[128 x 1]] [h1.bn Gradient[128 x *1]] [h1.isd Gradient[128 x 1]] [h1.m Gradient[128 x 1]] [h1.sc Gradient[128 x 1]] [h1.t Gradient[128 x *1]] [h1.y Gradient[128 x *1]] [labels Gradient[10 x *1]] [ol.W Gradient[10 x 128]] [ol.b Gradient[10 x 1]] [ol.t Gradient[10 x *1]] [ol.z Gradient[10 x 1 x *1]] [pool1 Gradient[14 x 14 x 16 x *1]] [pool2 Gradient[7 x 7 x 32 x *1]] }
+00000093D1AAEE00: {[pool2 Value[7 x 7 x 32 x *1]] }
+00000093D1AAEEA0: {[conv2.c.c.y Value[14 x 14 x 32 x *1]] }
+00000093D1AAF300: {[h1.y Value[128 x *1]] }
+00000093D1AAF580: {[conv2.y Value[14 x 14 x 32 x *1]] }
+00000093D1AAF760: {[h1.bn Value[128 x *1]] }
+00000093D1AAF940: {[ol.t Value[10 x *1]] }
+00000093D1AB0200: {[h1.t Value[128 x *1]] }
+00000093D1AB0700: {[ol.z Value[10 x 1 x *1]] }
+00000093D1CA31E0: {[conv2.c.c.sc Value[32 x 1]] }
+00000093D1CA3320: {[conv2.c.c.isd Value[32 x 1]] }
+00000093D1CA35A0: {[conv2.c.W Value[32 x 400]] }
+00000093D1CA3BE0: {[conv2.c.c.m Value[32 x 1]] }
+00000093D1CA3FA0: {[conv2.c.c.b Value[32 x 1]] }
+00000093D1CA40E0: {[featScale Value[1 x 1]] }
+00000093D1CA4720: {[features Value[28 x 28 x 1 x *1]] }
+00000093D1CA4C20: {[conv1.c.c.isd Value[16 x 1]] }
+00000093D1CA5440: {[conv1.c.c.m Value[16 x 1]] }
+00000093D1CA54E0: {[conv1.c.c.b Value[16 x 1]] }
+00000093D1CA5580: {[conv1.c.c.sc Value[16 x 1]] }
+00000093D1CA5620: {[conv1.c.W Value[16 x 25]] }
+00000093D1CA5A80: {[ol.b Value[10 x 1]] }
+00000093D1CA5B20: {[ol.W Value[10 x 128]] }
+00000093D1CA60C0: {[err Value[1]] }
+00000093D1CA6160: {[h1.W Value[128 x 7 x 7 x 32]] }
+00000093D1CA6200: {[conv1.c.c.c Value[28 x 28 x 16 x *1]] }
+00000093D1CA62A0: {[h1.isd Value[128 x 1]] }
+00000093D1CA6340: {[ce Value[1]] }
+00000093D1CA6480: {[h1.b Value[128 x 1]] }
+00000093D1CA6660: {[conv1.c.c.y Value[28 x 28 x 16 x *1]] }
+00000093D1CA6700: {[featScaled Value[28 x 28 x 1 x *1]] }
+00000093D1CA68E0: {[h1.sc Value[128 x 1]] }
+00000093D1CA6980: {[conv1.y Value[28 x 28 x 16 x *1]] }
+00000093D1CA6AC0: {[pool1 Value[14 x 14 x 16 x *1]] }
+00000093D1CA6B60: {[conv2.c.c.c Value[14 x 14 x 32 x *1]] }
+00000093D1CA6D40: {[h1.m Value[128 x 1]] }
+00000093D1CA6DE0: {[labels Value[10 x *1]] }
+
+05/13/2016 08:17:32: Final Results: Minibatch[1-313]: err = 0.00710000 * 10000; ce = 0.02063067 * 10000; perplexity = 1.02084496
+
+05/13/2016 08:17:32: Action "test" complete.
+
+05/13/2016 08:17:32: __COMPLETED__
--- a/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/run-test
+++ b/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/MNIST/03_ConvBatchNorm/run-test
@ -0,0 +1,11 @@
+#!/bin/bash
+
+. $TEST_DIR/../run-test-common
+
+cntkrun $CNTKTextConfigDir/03_ConvBatchNorm.cntk  "train=[SGD=[maxEpochs=3]] imageLayout=\"$imageLayout\""
+ExitCode=$?
+
+# Delete the test data if copied
+[[ "$Copied" -eq "1" ]] && rm -rf "$DataDir"
+
+exit $ExitCode
--- a/Показать больше
+++ b/Показать больше