Merge remote-tracking branch 'origin/master' into jdroppo/cudnn-rnn-lstm

Conflicts:
	Source/Math/Math.vcxproj
	Source/Math/Math.vcxproj.filters
	Source/Math/MathCUDA.vcxproj.filters
This commit is contained in:
Jasha Droppo 2016-05-20 11:20:07 -07:00
Родитель 1360f0fb15 f76a944bce
Коммит 5558144363
694 изменённых файлов: 505895 добавлений и 464976 удалений

4
.gitattributes поставляемый
Просмотреть файл

@ -40,6 +40,10 @@ run-test-common text eol=lf
run-timit-test-common text eol=lf
make_binary_drop_linux text eol=lf
# Used from Unix / Cygwin 'md5sum -c', needs to have LF line endings:
Tests/EndToEndTests/Examples/Speech/TIMIT/WriteBottleneck/expected_output_md5sum.*.txt eol=lf
Tests/EndToEndTests/Examples/Speech/TIMIT/WriteScaledLogLike/expected_output_md5sum.*.txt eol=lf
Makefile text
*.sln text
*.vcxproj text

4
.gitignore поставляемый
Просмотреть файл

@ -152,7 +152,9 @@ ModelManifest.xml
# Python
*.pyc
__pychache__/
__pycache__/
contrib/Python/doc/_build/*
contrib/Python/_cntk_default/*
# =========================
# Windows detritus

408
CNTK.sln
Просмотреть файл

@ -9,6 +9,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTK", "Source\CNTK\CNTK.vc
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {EB2BE26F-6BD4-4274-971F-86D080779DD1}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}
EndProjectSection
@ -654,8 +655,6 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{850008BC-36B0-4A0A-BD0C-B6D5C2184227}"
ProjectSection(SolutionItems) = preProject
Examples\Text\PennTreebank\Config\rnn.cntk = Examples\Text\PennTreebank\Config\rnn.cntk
Examples\Text\PennTreebank\Config\S2SAutoEncoder.cntk = Examples\Text\PennTreebank\Config\S2SAutoEncoder.cntk
Examples\Text\PennTreebank\Config\S2SLib.bs = Examples\Text\PennTreebank\Config\S2SLib.bs
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{E6DC3B7D-303D-4A54-B040-D8DCF8C56E17}"
@ -710,45 +709,30 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Text", "Text", "{439BE0E0-FABE-403D-BF2C-A41FB8A60616}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MNIST", "MNIST", "{63C6816D-66BF-487E-B541-094142C8272B}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\MNIST\README.txt = Tests\EndToEndTests\Examples\Image\MNIST\README.txt
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_Convolution", "02_Convolution", "{6F1D0CE1-0F18-4B4C-9581-1F2146C8D300}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.linux.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\run-test = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\run-test
Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\testcases.yml = Tests\EndToEndTests\Examples\Image\MNIST\02_Convolution\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_OneHidden", "01_OneHidden", "{A0B366FE-2EEA-4E32-9AED-12C46409C30C}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\run-test = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\run-test
Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\testcases.yml = Tests\EndToEndTests\Examples\Image\MNIST\01_OneHidden\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_ConvBatchNorm", "{BD783D50-47E2-485F-BDAF-29BD40D84645}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.debug.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.debug.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.debug.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.debug.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.release.cpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.release.cpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.release.gpu.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.release.gpu.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\run-test = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\run-test
Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml = Tests\EndToEndTests\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml
EndProjectSection
@ -888,12 +872,21 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NetworkTests", "Tests\UnitT
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {EB2BE26F-6BD4-4274-971F-86D080779DD1}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Text", "Text", "{8656B71D-E24C-4AC2-8BE4-C07B415A3E15}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceClassification", "SequenceClassification", "{E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SequenceClassification\baseline.linux.cpu.txt = Tests\EndToEndTests\Text\SequenceClassification\baseline.linux.cpu.txt
Tests\EndToEndTests\Text\SequenceClassification\baseline.linux.gpu.txt = Tests\EndToEndTests\Text\SequenceClassification\baseline.linux.gpu.txt
Tests\EndToEndTests\Text\SequenceClassification\baseline.windows.cpu.txt = Tests\EndToEndTests\Text\SequenceClassification\baseline.windows.cpu.txt
Tests\EndToEndTests\Text\SequenceClassification\baseline.windows.gpu.txt = Tests\EndToEndTests\Text\SequenceClassification\baseline.windows.gpu.txt
Tests\EndToEndTests\Text\SequenceClassification\run-test = Tests\EndToEndTests\Text\SequenceClassification\run-test
Tests\EndToEndTests\Text\SequenceClassification\testcases.yml = Tests\EndToEndTests\Text\SequenceClassification\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{8629430A-821E-43BA-AEC5-8B2CF31A2A7A}"
EndProject
@ -976,6 +969,306 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{181664AC-4C9
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Source\Common\Common.vcxproj", "{86883653-8A61-4038-81A0-2379FAE4200A}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CompositeDataReader", "Source\Readers\CompositeDataReader\CompositeDataReader.vcxproj", "{7B7A563D-AA8E-4660-A805-D50235A02120}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CNTKTextFormatReader", "CNTKTextFormatReader", "{99FAAACE-C360-43CF-B706-20621F164484}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{629761D1-7A05-409A-B62B-FC1CCC0D6EED}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Image", "Image", "{D4302516-C77F-4FAF-82FB-18DB39F5A53B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelTraining", "ParallelTraining", "{06BE675D-80DD-419A-8E00-26953EF11F25}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\SimpleMultiGPU.cntk = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\SimpleMultiGPU.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Speech", "Speech", "{5642F047-490B-4ABD-8113-8563C872B39F}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Image", "Image", "{2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Other", "Other", "{225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MNIST", "MNIST", "{EBD36FD9-FE5B-420E-A572-DC6117300DB3}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\run-test-common = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\run-test-common
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{08D284FA-2914-4B35-A89C-896DBA2B4484}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{95FAC6A0-6AE7-4947-9DFD-498FE71311AD}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\run-test-common = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\run-test-common
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{A877E526-89C1-422E-9F90-4DDE84135A36}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\01_Conv.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\01_Conv.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\02_BatchNormConv.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\02_BatchNormConv.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\05_ConvLocal.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\Config\05_ConvLocal.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_Convolution", "01_Convolution", "{071D8449-D080-4141-869D-600CC3C2A0BE}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_BatchNormConv", "02_BatchNormConv", "{D3A74C52-BC74-4DA3-BE93-8F4241D54EE0}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "05_ConvLocal", "05_ConvLocal", "{EC466625-BC66-41DF-B55A-EB28AFABE24E}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.linux.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\Miscellaneous\CIFAR-10\05_ConvLocal\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_OneHidden", "01_OneHidden", "{34D578DB-0101-45C4-9DF0-37DE9AB87C65}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.linux.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\01_OneHidden\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_Convolution", "02_Convolution", "{1FE04815-E02E-498C-B276-6D058D46D754}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.linux.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\02_Convolution\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_ConvBatchNorm", "{2A125ED5-9C8A-4BDF-A200-862104289608}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.linux.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\03_ConvBatchNorm\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{E9207003-B860-4D57-B2CA-09AF52FF191F}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\01_OneHidden.ndl
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\02_Convolution.ndl
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\03_ConvBatchNorm.ndl
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\Macros.ndl = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Image\MNIST\Config\Macros.ndl
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple2d", "Simple2d", "{50420947-E502-40B4-8739-2C0BADD93BEE}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MultiGpu", "MultiGpu", "{935E5A95-888D-4922-AB5A-E9C11D65E974}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.linux.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\baseline.windows.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\MultiGpu\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{773313DD-69DD-463F-ADC9-E8A902A5223C}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.linux.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\baseline.windows.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Simple\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{C8E2EF3B-CCBF-4BDD-8127-2252626FB22B}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Multigpu.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Multigpu.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Simple.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Examples\Other\Simple2d\Config\Simple.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "QuickE2E", "QuickE2E", "{A4F79A83-DE30-40FA-88F4-86304C89AC7F}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.linux.txt
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\baseline.windows.txt
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\Image_QuickE2E.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Image\QuickE2E\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple", "Simple", "{CC47AF62-2558-455F-81CB-36901AF033B0}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.linux.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.windows.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\baseline.windows.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\README.txt = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\README.txt
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\run-test = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\run-test
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\Speech_Simple.cntk = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\Speech_Simple.cntk
Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\Speech\Simple\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "NoQuantization", "NoQuantization", "{1BA5209D-3EB6-48E7-BE8A-0622315070C0}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{AA14A8DB-669D-447B-A97F-8B726BF30188}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\Data\SimpleDataTrain.txt
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SinglePrecision", "SinglePrecision", "{CA248859-AA91-47D6-AC05-3542AB27E290}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\baseline.windows.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\run-test
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\SinglePrecision\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePrecision", "{8B6E9318-5ED0-49BF-945B-072E0D90A886}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.cpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\baseline.windows.gpu.txt
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\run-test
Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml = Tests\EndToEndTests\CNTKTextFormatReader\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SparseDSSM", "SparseDSSM", "{1FB54750-B668-4AC3-966F-ED504020AC06}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SparseDSSM\baseline.cpu.txt = Tests\EndToEndTests\Text\SparseDSSM\baseline.cpu.txt
Tests\EndToEndTests\Text\SparseDSSM\baseline.gpu.txt = Tests\EndToEndTests\Text\SparseDSSM\baseline.gpu.txt
Tests\EndToEndTests\Text\SparseDSSM\baseline.windows.cpu.txt = Tests\EndToEndTests\Text\SparseDSSM\baseline.windows.cpu.txt
Tests\EndToEndTests\Text\SparseDSSM\baseline.windows.gpu.txt = Tests\EndToEndTests\Text\SparseDSSM\baseline.windows.gpu.txt
Tests\EndToEndTests\Text\SparseDSSM\dssm.cntk = Tests\EndToEndTests\Text\SparseDSSM\dssm.cntk
Tests\EndToEndTests\Text\SparseDSSM\dssm.ndl = Tests\EndToEndTests\Text\SparseDSSM\dssm.ndl
Tests\EndToEndTests\Text\SparseDSSM\run-test = Tests\EndToEndTests\Text\SparseDSSM\run-test
Tests\EndToEndTests\Text\SparseDSSM\testcases.yml = Tests\EndToEndTests\Text\SparseDSSM\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "WriteCommand", "WriteCommand", "{3E9BD61F-1F0A-4966-BE17-803AEFD1DFA4}"
ProjectSection(SolutionItems) = preProject
tests\endtoendtests\Speech\DNN\WriteCommand\baseline.cpu.txt = tests\endtoendtests\Speech\DNN\WriteCommand\baseline.cpu.txt
tests\endtoendtests\Speech\DNN\WriteCommand\baseline.gpu.txt = tests\endtoendtests\Speech\DNN\WriteCommand\baseline.gpu.txt
tests\endtoendtests\Speech\DNN\WriteCommand\baseline.windows.cpu.txt = tests\endtoendtests\Speech\DNN\WriteCommand\baseline.windows.cpu.txt
tests\endtoendtests\Speech\DNN\WriteCommand\baseline.windows.gpu.txt = tests\endtoendtests\Speech\DNN\WriteCommand\baseline.windows.gpu.txt
tests\endtoendtests\Speech\DNN\WriteCommand\cntk.cntk = tests\endtoendtests\Speech\DNN\WriteCommand\cntk.cntk
tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.cpu = tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.cpu
tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.gpu = tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.gpu
tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.windows.cpu = tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.windows.cpu
tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.windows.gpu = tests\endtoendtests\Speech\DNN\WriteCommand\Output.ScaledLogLikelihood.windows.gpu
Tests\endtoendtests\Speech\DNN\WriteCommand\run-test = Tests\endtoendtests\Speech\DNN\WriteCommand\run-test
tests\endtoendtests\Speech\DNN\WriteCommand\testcases.yml = tests\endtoendtests\Speech\DNN\WriteCommand\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelBufferedAsyncGradientAggregation", "ParallelBufferedAsyncGradientAggregation", "{5560DDD4-1E6E-4F41-B9BD-F52A19DF0B31}"
ProjectSection(SolutionItems) = preProject
tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.cpu.txt = tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.cpu.txt
tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.gpu.txt = tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.gpu.txt
tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.windows.cpu.txt = tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.windows.cpu.txt
tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.windows.gpu.txt = tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\baseline.windows.gpu.txt
Tests\endtoendtests\Speech\DNN\ParallelBufferedAsyncGradientAggregation\run-test = Tests\endtoendtests\Speech\DNN\ParallelBufferedAsyncGradientAggregation\run-test
tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\testcases.yml = tests\endtoendtests\speech\dnn\ParallelBufferedAsyncGradientAggregation\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelCrossValidation", "ParallelCrossValidation", "{9834E864-A8CD-4D28-A3C9-F79FE0F421AE}"
ProjectSection(SolutionItems) = preProject
tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.cpu.txt = tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.cpu.txt
tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.gpu.txt = tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.gpu.txt
tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.windows.cpu.txt = tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.windows.cpu.txt
tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.windows.gpu.txt = tests\endtoendtests\speech\dnn\ParallelCrossValidation\baseline.windows.gpu.txt
tests\endtoendtests\speech\dnn\ParallelCrossValidation\cntkcv.cntk = tests\endtoendtests\speech\dnn\ParallelCrossValidation\cntkcv.cntk
Tests\endtoendtests\Speech\DNN\ParallelCrossValidation\run-test = Tests\endtoendtests\Speech\DNN\ParallelCrossValidation\run-test
tests\endtoendtests\speech\dnn\ParallelCrossValidation\testcases.yml = tests\endtoendtests\speech\dnn\ParallelCrossValidation\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelNoQuantizationBufferedAsyncGradientAggregation", "ParallelNoQuantizationBufferedAsyncGradientAggregation", "{40F65441-A7B7-4425-8E75-CD74AB262F3F}"
ProjectSection(SolutionItems) = preProject
tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.cpu.txt = tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.cpu.txt
tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.gpu.txt = tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.gpu.txt
tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.windows.cpu.txt = tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.windows.cpu.txt
tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.windows.gpu.txt = tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\baseline.windows.gpu.txt
Tests\endtoendtests\Speech\DNN\ParallelNoQuantizationBufferedAsyncGradientAggregation\run-test = Tests\endtoendtests\Speech\DNN\ParallelNoQuantizationBufferedAsyncGradientAggregation\run-test
tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\testcases.yml = tests\endtoendtests\speech\dnn\ParallelNoQuantizationBufferedAsyncGradientAggregation\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PlotDNN", "PlotDNN", "{4D6F731C-4A6D-4E21-AC3C-9E1F26E5547E}"
ProjectSection(SolutionItems) = preProject
Tests\endtoendtests\Speech\DNN\PlotDNN\baseline.txt = Tests\endtoendtests\Speech\DNN\PlotDNN\baseline.txt
tests\endtoendtests\speech\dnn\PlotDNN\cntkSpeech.dnn.dot = tests\endtoendtests\speech\dnn\PlotDNN\cntkSpeech.dnn.dot
tests\endtoendtests\speech\dnn\PlotDNN\plot.cntk = tests\endtoendtests\speech\dnn\PlotDNN\plot.cntk
Tests\endtoendtests\Speech\DNN\PlotDNN\run-test = Tests\endtoendtests\Speech\DNN\PlotDNN\run-test
tests\endtoendtests\speech\dnn\PlotDNN\testcases.yml = tests\endtoendtests\speech\dnn\PlotDNN\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelBM", "ParallelBM", "{36C42845-0D48-4A46-9C67-2B593A80A09C}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.linux.cpu.txt = Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.linux.cpu.txt
Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.linux.gpu.txt = Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.linux.gpu.txt
Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.windows.cpu.txt = Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.windows.cpu.txt
Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.windows.gpu.txt = Tests\EndToEndTests\Speech\DNN\ParallelBM\baseline.windows.gpu.txt
Tests\EndToEndTests\Speech\DNN\ParallelBM\cntk.cntk = Tests\EndToEndTests\Speech\DNN\ParallelBM\cntk.cntk
Tests\EndToEndTests\Speech\DNN\ParallelBM\run-test = Tests\EndToEndTests\Speech\DNN\ParallelBM\run-test
Tests\EndToEndTests\Speech\DNN\ParallelBM\testcases.yml = Tests\EndToEndTests\Speech\DNN\ParallelBM\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceToSequence", "SequenceToSequence", "{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{85A05261-41D0-41DF-80B5-ADB6ABB54632}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "G2P", "G2P", "{4AD12278-9705-4BBA-B2C3-D6D5856AADC3}"
ProjectSection(SolutionItems) = preProject
Examples\SequenceToSequence\Miscellaneous\G2P\G2P.cntk = Examples\SequenceToSequence\Miscellaneous\G2P\G2P.cntk
Examples\SequenceToSequence\Miscellaneous\G2P\README.txt = Examples\SequenceToSequence\Miscellaneous\G2P\README.txt
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Source\Extensibility\CPPEvalClient\CPPEvalClient.vcxproj", "{578D52A0-3928-4405-A016-F016E8B49031}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ExperimentalHtkmlfReader", "ExperimentalHtkmlfReader", "{977ECCB7-598D-4548-B95B-BACA9CC7D98B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DNN", "DNN", "{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{772A0DB3-4710-4281-8AA9-A9F1F7C543D3}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "QuickE2E", "QuickE2E", "{FE3592CF-3EB9-4502-BB95-E2AB974C0FB5}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SVD", "SVD", "{BA6A65C5-92A2-4040-ADC3-0727A45694F6}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUtterance", "{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.cpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.cpu.txt
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.gpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.gpu.txt
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.cpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.cpu.txt
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.gpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.gpu.txt
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\cntk.cntk = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\cntk.cntk
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\run-test = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\run-test
Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\testcases.yml = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Truncated", "Truncated", "{1141DC61-E014-4DEC-9157-F6B1FC055C7A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1206,6 +1499,22 @@ Global
{86883653-8A61-4038-81A0-2379FAE4200A}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release|x64.ActiveCfg = Release|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release|x64.Build.0 = Release|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Debug|x64.ActiveCfg = Debug|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Debug|x64.Build.0 = Debug|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Release|x64.ActiveCfg = Release|x64
{7B7A563D-AA8E-4660-A805-D50235A02120}.Release|x64.Build.0 = Release|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Debug|x64.ActiveCfg = Debug|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Debug|x64.Build.0 = Debug|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Release|x64.ActiveCfg = Release|x64
{578D52A0-3928-4405-A016-F016E8B49031}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -1337,6 +1646,53 @@ Global
{EC780385-7580-4D15-914B-1D878A295CBC} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
{181664AC-4C95-4798-A923-09B879215B33} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
{99FAAACE-C360-43CF-B706-20621F164484} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
{629761D1-7A05-409A-B62B-FC1CCC0D6EED} = {99FAAACE-C360-43CF-B706-20621F164484}
{D4302516-C77F-4FAF-82FB-18DB39F5A53B} = {99FAAACE-C360-43CF-B706-20621F164484}
{06BE675D-80DD-419A-8E00-26953EF11F25} = {99FAAACE-C360-43CF-B706-20621F164484}
{5642F047-490B-4ABD-8113-8563C872B39F} = {99FAAACE-C360-43CF-B706-20621F164484}
{2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F} = {629761D1-7A05-409A-B62B-FC1CCC0D6EED}
{225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3} = {629761D1-7A05-409A-B62B-FC1CCC0D6EED}
{EBD36FD9-FE5B-420E-A572-DC6117300DB3} = {2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}
{08D284FA-2914-4B35-A89C-896DBA2B4484} = {2B6CCAB6-A92A-483C-9FDB-8412FA4DC42F}
{95FAC6A0-6AE7-4947-9DFD-498FE71311AD} = {08D284FA-2914-4B35-A89C-896DBA2B4484}
{A877E526-89C1-422E-9F90-4DDE84135A36} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
{071D8449-D080-4141-869D-600CC3C2A0BE} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
{D3A74C52-BC74-4DA3-BE93-8F4241D54EE0} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
{EC466625-BC66-41DF-B55A-EB28AFABE24E} = {95FAC6A0-6AE7-4947-9DFD-498FE71311AD}
{34D578DB-0101-45C4-9DF0-37DE9AB87C65} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
{1FE04815-E02E-498C-B276-6D058D46D754} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
{2A125ED5-9C8A-4BDF-A200-862104289608} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
{E9207003-B860-4D57-B2CA-09AF52FF191F} = {EBD36FD9-FE5B-420E-A572-DC6117300DB3}
{50420947-E502-40B4-8739-2C0BADD93BEE} = {225F5A3A-7CAF-4C71-9143-3AD2AC4D47A3}
{935E5A95-888D-4922-AB5A-E9C11D65E974} = {50420947-E502-40B4-8739-2C0BADD93BEE}
{773313DD-69DD-463F-ADC9-E8A902A5223C} = {50420947-E502-40B4-8739-2C0BADD93BEE}
{C8E2EF3B-CCBF-4BDD-8127-2252626FB22B} = {50420947-E502-40B4-8739-2C0BADD93BEE}
{A4F79A83-DE30-40FA-88F4-86304C89AC7F} = {D4302516-C77F-4FAF-82FB-18DB39F5A53B}
{CC47AF62-2558-455F-81CB-36901AF033B0} = {5642F047-490B-4ABD-8113-8563C872B39F}
{1BA5209D-3EB6-48E7-BE8A-0622315070C0} = {06BE675D-80DD-419A-8E00-26953EF11F25}
{AA14A8DB-669D-447B-A97F-8B726BF30188} = {06BE675D-80DD-419A-8E00-26953EF11F25}
{CA248859-AA91-47D6-AC05-3542AB27E290} = {1BA5209D-3EB6-48E7-BE8A-0622315070C0}
{8B6E9318-5ED0-49BF-945B-072E0D90A886} = {1BA5209D-3EB6-48E7-BE8A-0622315070C0}
{86883653-8A61-4038-81A0-2379FAE4200A} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{7B7A563D-AA8E-4660-A805-D50235A02120} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{1FB54750-B668-4AC3-966F-ED504020AC06} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
{3E9BD61F-1F0A-4966-BE17-803AEFD1DFA4} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{5560DDD4-1E6E-4F41-B9BD-F52A19DF0B31} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{9834E864-A8CD-4D28-A3C9-F79FE0F421AE} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{40F65441-A7B7-4425-8E75-CD74AB262F3F} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{4D6F731C-4A6D-4E21-AC3C-9E1F26E5547E} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{36C42845-0D48-4A46-9C67-2B593A80A09C} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26} = {47755F2E-D674-4175-9E38-8EA053455072}
{85A05261-41D0-41DF-80B5-ADB6ABB54632} = {A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}
{4AD12278-9705-4BBA-B2C3-D6D5856AADC3} = {85A05261-41D0-41DF-80B5-ADB6ABB54632}
{578D52A0-3928-4405-A016-F016E8B49031} = {60F87E25-BC87-4782-8E20-1621AAEBB113}
{977ECCB7-598D-4548-B95B-BACA9CC7D98B} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
{772A0DB3-4710-4281-8AA9-A9F1F7C543D3} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
{FE3592CF-3EB9-4502-BB95-E2AB974C0FB5} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
{BA6A65C5-92A2-4040-ADC3-0727A45694F6} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
{1141DC61-E014-4DEC-9157-F6B1FC055C7A} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -25,7 +25,8 @@ DNN = [
err = ErrorPrediction(labels, ol)
# Special Nodes
errTop5 = ErrorPrediction(labels, ol, Const(1), tag="eval")
# errTop1 can be used to compute, for example, top-5 error by changing Const(1) to Const(5).
errTop1 = ErrorPrediction(labels, ol, Const(1), tag="eval")
FeatureNodes = (features)
LabelNodes = (labels)
CriterionNodes = (ce)

Просмотреть файл

@ -26,7 +26,8 @@ ndlMacros = "$ConfigDir$/Macros.ndl"
traceLevel=1
numMBsToShowResult=500
prefetch=true
# Note: turn off prefetching; known to crash UCIFastReader occasionally.
prefetch=false
# If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
initOnCPUOnly=true

Просмотреть файл

@ -16,7 +16,8 @@ imageLayout = "cudnn"
# If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
initOnCPUOnly=true
prefetch = "true"
# Note: turn off prefetching; known to crash UCIFastReader occasionally.
prefetch = "false"
command = Train:Test

Просмотреть файл

@ -16,7 +16,8 @@ imageLayout = "cudnn"
# If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
initOnCPUOnly=true
prefetch = "true"
# Note: turn off prefetching; known to crash UCIFastReader occasionally.
prefetch = "false"
command = Train:Test

Просмотреть файл

@ -13,7 +13,8 @@ imageLayout = "cudnn"
# override the above as follows when running on CPU:
# deviceId = -1
prefetch = "true"
# Note: turn off prefetching; known to crash UCIFastReader occasionally.
prefetch = "false"
command = Train:Test

Просмотреть файл

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<opencv_storage>
<EigVal type_id="opencv-matrix">
<rows>1</rows>
<cols>3</cols>
<dt>f</dt>
<data>
0.2175 0.0188 0.0045
</data>
</EigVal>
<EigVec type_id="opencv-matrix">
<rows>3</rows>
<cols>3</cols>
<dt>f</dt>
<data>
-0.5675 0.7192 0.4009
-0.5808 -0.0045 -0.8140
-0.5836 -0.6948 0.4203
</data>
</EigVec>
</opencv_storage>

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -73,12 +73,12 @@ ResNetNode2AInc(inp, outMap, inWCount, wCount, kW, kH, wScale, bValue, scValue,
]
# Standard building block for ResNet with padding (option B).
ResNetNode2BInc(inp, outMap, inMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, bnTimeConst)
ResNetNode2BInc(inp, outMap, inMap, inWCount, wCount, kW, kH, wScale, bValue, scValue, bnTimeConst, stride1x1, stride3x3)
[
# First convolution layer.
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, 2, 2, wScale, bValue, scValue, bnTimeConst)
c1 = ConvBNReLULayer(inp, outMap, inWCount, kW, kH, stride1x1, stride1x1, wScale, bValue, scValue, bnTimeConst)
# Second convolution layer, no ReLU.
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, 1, 1, wScale, bValue, scValue, bnTimeConst)
c2 = ConvBNLayer(c1, outMap, wCount, kW, kH, stride3x3, stride3x3, wScale, bValue, scValue, bnTimeConst)
# Projection convolution layer.
c_proj = Conv1x1(inp, outMap, inMap, 2, 2, wScale, bValue, scValue, bnTimeConst)
@ -116,12 +116,12 @@ ResNetNode3AInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue
y = RectifiedLinear(p)
]
ResNetNode3BInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, bnTimeConst, projStride)
ResNetNode3BInc(inp, inMap, convMap, outMap, convWCount, wScale, bValue, scValue, bnTimeConst, projStride, stride1x1, stride3x3)
[
# 1x1 reducing convolution.
c1 = Conv1x1ReLU(inp, convMap, inMap, projStride, projStride, wScale, bValue, scValue, bnTimeConst)
c1 = Conv1x1ReLU(inp, convMap, inMap, stride1x1, stride1x1, wScale, bValue, scValue, bnTimeConst)
# 3x3 convolution.
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, 1, 1, wScale, bValue, scValue, bnTimeConst)
c2 = ConvBNReLULayer(c1, convMap, convWCount, 3, 3, stride3x3, stride3x3, wScale, bValue, scValue, bnTimeConst)
# 1x1 expanding convolution, no ReLU.
c3 = Conv1x1(c2, outMap, convMap, 1, 1, wScale, bValue, scValue, bnTimeConst)
# Input-to-output mapping convolution.

Просмотреть файл

@ -1,12 +1,22 @@
# CNTK example: ImageNet ResNet
**Disclaimer: network configurations and experiment settings in this this folder try to follow those published in the [ResNet paper](http://arxiv.org/abs/1512.03385) as close as possible. However, these samples are NOT endorsed or verified by the researchers who published the original work. It is NOT guaranteed that you get the same (or even close) results as those in the paper.**
## Overview
|Data: |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) of images.
|:---------|:---
|Purpose |This example demonstrates usage of the NDL (Network Description Language) to define networks similar to ResNet.
|Network |NDLNetworkBuilder, deep convolutional networks resembling ResNet networks.
|Network |NDLNetworkBuilder, deep convolutional residual networks (ResNet).
|Training |Stochastic gradient descent with momentum.
## Details
The network configurations and experiment settings in this this folder resemble the ones in the original [ResNet paper](http://arxiv.org/abs/1512.03385) with few minor changes inspired by [this work](https://github.com/facebook/fb.resnet.torch).
The following table contains results as well as links to pre-trained models that can be used in various applications.
| Network | Top-1 error | Top-5 error | Model
| ------------- | ----------- | ----------- | ----------
| ResNet-18 | 29.57 | 10.41 | [Download](https://www.cntk.ai/resnet/ResNet_18.model)
| ResNet-34 | 27.31 | 8.97 | [Download](https://www.cntk.ai/resnet/ResNet_34.model)
| ResNet-50 | 24.74 | 7.56 | [Download](https://www.cntk.ai/resnet/ResNet_50.model)
## Notes
This work is an implementation of ResNets in CNTK. If you are interested in the original implementation of ResNet, follow [this link](https://github.com/KaimingHe/deep-residual-networks).

Просмотреть файл

@ -18,10 +18,9 @@ stderr="$OutputDir$/ResNet_152"
traceLevel=1
numMBsToShowResult=500
Proj64to256Filename = "$ConfigDir$/64to256.txt"
Proj256to512Filename = "$ConfigDir$/256to512.txt"
Proj512to1024Filename = "$ConfigDir$/512to1024.txt"
Proj1024to2048Filename = "$ConfigDir$/1024to2048.txt"
# Strides for increasing layers. Defaults (paper) are 2 for 1x1 and 1 for 3x3.
stride1x1=1
stride3x3=2
Train=[
action="train"
@ -36,7 +35,7 @@ Train=[
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
learningRatesPerMB=1.0*30:0.1*30:0.01*30:0.001
momentumPerMB=0.9
maxEpochs=125
gradUpdateType="None"
@ -79,8 +78,21 @@ Train=[
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
interpolations="cubic"
# Aspect ratio jitter radius. Default is 0 (disabled).
aspectRatioRadius=0:0.2
# Brightness, contrast and color jittering. Default is 0 (disabled).
# Using 0 in the first epoch so the network can process original images.
brightnessRadius=0:0.2
contrastRadius=0:0.2
saturationRadius=0:0.4
# Intensity jittering: enabled if file is specified and intensityStdDev > 0.
# The file stores 1x3 vector (eigenvalues) and 3x3 matrix (eigenvectors) in OpenCV XML format.
intensityFile="$ConfigDir$/ImageNet1K_intensity.xml"
# StdDev for intensity jittering. Start from the second epoch.
intensityStdDev=0:0.1
# Mean subtraction: enabled if file is specified.
# The file stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[

Просмотреть файл

@ -47,11 +47,11 @@ DNN=[
pool1vs = 2
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hs, pool1vs, imageLayout = "cudnn")
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst, 1)
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst, 1, 1, 1)
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst)
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst, 2)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
@ -60,7 +60,7 @@ DNN=[
rn2_7 = ResNetNode3A(rn2_6, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn2_8 = ResNetNode3A(rn2_7, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn3_1 = ResNetNode3BInc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst, 2)
rn3_1 = ResNetNode3BInc(rn2_8, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
@ -97,7 +97,7 @@ DNN=[
rn3_35= ResNetNode3A(rn3_34, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_36= ResNetNode3A(rn3_35, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn4_1 = ResNetNode3BInc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst, 2)
rn4_1 = ResNetNode3BInc(rn3_36, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst)
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst)

Просмотреть файл

@ -18,10 +18,14 @@ stderr="$OutputDir$/ResNet_18"
traceLevel=1
numMBsToShowResult=500
# Strides for increasing layers. Defaults (paper) are 2 for 1x1 and 1 for 3x3.
stride1x1=1
stride3x3=2
Train=[
action="train"
modelPath="$ModelDir$/ResNet_18"
NDLNetworkBuilder=[
networkDescription="$ConfigDir$/ResNet_18.ndl"
]
@ -31,7 +35,7 @@ Train=[
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
learningRatesPerMB=1.0*30:0.1*30:0.01*30:0.001
momentumPerMB=0.9
maxEpochs=125
gradUpdateType="None"
@ -74,8 +78,21 @@ Train=[
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
interpolations="cubic"
# Aspect ratio jitter radius. Default is 0 (disabled).
aspectRatioRadius=0:0.2
# Brightness, contrast and color jittering. Default is 0 (disabled).
# Using 0 in the first epoch so the network can process original images.
brightnessRadius=0:0.2
contrastRadius=0:0.2
saturationRadius=0:0.4
# Intensity jittering: enabled if file is specified and intensityStdDev > 0.
# The file stores 1x3 vector (eigenvalues) and 3x3 matrix (eigenvectors) in OpenCV XML format.
intensityFile="$ConfigDir$/ImageNet1K_intensity.xml"
# StdDev for intensity jittering. Start from the second epoch.
intensityStdDev=0:0.1
# Mean subtraction: enabled if file is specified.
# The file stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[

Просмотреть файл

@ -45,15 +45,15 @@ DNN=[
rn1_2 = ResNetNode2A(rn1_1, cMap1, 576, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap2 = 128
rn2_1 = ResNetNode2BInc(rn1_2, cMap2, cMap1, 576, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn2_1 = ResNetNode2BInc(rn1_2, cMap2, cMap1, 576, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn2_2 = ResNetNode2A(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap3 = 256
rn3_1 = ResNetNode2BInc(rn2_2, cMap3, cMap2, 1152, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn3_1 = ResNetNode2BInc(rn2_2, cMap3, cMap2, 1152, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn3_2 = ResNetNode2A(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap4 = 512
rn4_1 = ResNetNode2BInc(rn3_2, cMap4, cMap3, 2304, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn4_1 = ResNetNode2BInc(rn3_2, cMap4, cMap3, 2304, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn4_2 = ResNetNode2A(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn4_3 = ResNetNode2A(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)

Просмотреть файл

@ -18,9 +18,9 @@ stderr="$OutputDir$/ResNet_34"
traceLevel=1
numMBsToShowResult=500
Proj64to128Filename = "$ConfigDir$/64to128.txt"
Proj128to256Filename = "$ConfigDir$/128to256.txt"
Proj256to512Filename = "$ConfigDir$/256to512.txt"
# Strides for increasing layers. Defaults (paper) are 2 for 1x1 and 1 for 3x3.
stride1x1=1
stride3x3=2
Train=[
action="train"
@ -35,7 +35,7 @@ Train=[
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
learningRatesPerMB=1.0*30:0.1*30:0.01*30:0.001
momentumPerMB=0.9
maxEpochs=125
gradUpdateType="None"
@ -78,8 +78,21 @@ Train=[
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
interpolations="cubic"
# Aspect ratio jitter radius. Default is 0 (disabled).
aspectRatioRadius=0:0.2
# Brightness, contrast and color jittering. Default is 0 (disabled).
# Using 0 in the first epoch so the network can process original images.
brightnessRadius=0:0.2
contrastRadius=0:0.2
saturationRadius=0:0.4
# Intensity jittering: enabled if file is specified and intensityStdDev > 0.
# The file stores 1x3 vector (eigenvalues) and 3x3 matrix (eigenvectors) in OpenCV XML format.
intensityFile="$ConfigDir$/ImageNet1K_intensity.xml"
# StdDev for intensity jittering. Start from the second epoch.
intensityStdDev=0:0.1
# Mean subtraction: enabled if file is specified.
# The file stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[

Просмотреть файл

@ -46,13 +46,13 @@ DNN=[
rn1_3 = ResNetNode2A(rn1_2, cMap1, 576, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap2 = 128
rn2_1 = ResNetNode2BInc(rn1_3, cMap2, cMap1, 576, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn2_1 = ResNetNode2BInc(rn1_3, cMap2, cMap1, 576, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn2_2 = ResNetNode2A(rn2_1, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn2_3 = ResNetNode2A(rn2_2, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn2_4 = ResNetNode2A(rn2_3, cMap2, 1152, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap3 = 256
rn3_1 = ResNetNode2BInc(rn2_4, cMap3, cMap2, 1152, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn3_1 = ResNetNode2BInc(rn2_4, cMap3, cMap2, 1152, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn3_2 = ResNetNode2A(rn3_1, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn3_3 = ResNetNode2A(rn3_2, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn3_4 = ResNetNode2A(rn3_3, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
@ -60,7 +60,7 @@ DNN=[
rn3_6 = ResNetNode2A(rn3_5, cMap3, 2304, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
cMap4 = 512
rn4_1 = ResNetNode2BInc(rn3_6, cMap4, cMap3, 2304, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn4_1 = ResNetNode2BInc(rn3_6, cMap4, cMap3, 2304, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst, $stride1x1$, $stride3x3$)
rn4_2 = ResNetNode2A(rn4_1, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)
rn4_3 = ResNetNode2A(rn4_2, cMap4, 4608, kW, kH, convWScale, convBValue, scValue, bnTimeConst)

Просмотреть файл

@ -18,10 +18,9 @@ stderr="$OutputDir$/ResNet_50"
traceLevel=1
numMBsToShowResult=500
Proj64to256Filename = "$ConfigDir$/64to256.txt"
Proj256to512Filename = "$ConfigDir$/256to512.txt"
Proj512to1024Filename = "$ConfigDir$/512to1024.txt"
Proj1024to2048Filename = "$ConfigDir$/1024to2048.txt"
# Strides for increasing layers. Defaults (paper) are 2 for 1x1 and 1 for 3x3.
stride1x1=1
stride3x3=2
Train=[
action="train"
@ -36,7 +35,7 @@ Train=[
minibatchSize=256
# Note that learning rates are 10x more than in the paper due to a different
# momentum update rule in CNTK: v{t + 1} = lr*(1 - momentum)*g{t + 1} + momentum*v{t}
learningRatesPerMB=1.0*35:0.1*35:0.01
learningRatesPerMB=1.0*30:0.1*30:0.01*30:0.001
momentumPerMB=0.9
maxEpochs=125
gradUpdateType="None"
@ -79,8 +78,21 @@ Train=[
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations="Linear"
# Stores mean values for each pixel in OpenCV matrix XML format.
interpolations="cubic"
# Aspect ratio jitter radius. Default is 0 (disabled).
aspectRatioRadius=0:0.2
# Brightness, contrast and color jittering. Default is 0 (disabled).
# Using 0 in the first epoch so the network can process original images.
brightnessRadius=0:0.2
contrastRadius=0:0.2
saturationRadius=0:0.4
# Intensity jittering: enabled if file is specified and intensityStdDev > 0.
# The file stores 1x3 vector (eigenvalues) and 3x3 matrix (eigenvectors) in OpenCV XML format.
intensityFile="$ConfigDir$/ImageNet1K_intensity.xml"
# StdDev for intensity jittering. Start from the second epoch.
intensityStdDev=0:0.1
# Mean subtraction: enabled if file is specified.
# The file stores mean values for each pixel in OpenCV matrix XML format.
meanFile="$ConfigDir$/ImageNet1K_mean.xml"
]
labels=[

Просмотреть файл

@ -47,23 +47,23 @@ DNN=[
pool1vs = 2
pool1 = MaxNDPooling(conv1, pool1W, pool1H, pool1hs, pool1vs)
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst, 1)
rn1_1 = ResNetNode3BInc(pool1, cMap1, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst, 1, 1, 1)
rn1_2 = ResNetNode3A(rn1_1, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst)
rn1_3 = ResNetNode3A(rn1_2, cMap3, cMap1, cMap3, 576, convWScale, convBValue, scValue, bnTimeConst)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst, 2)
rn2_1 = ResNetNode3BInc(rn1_3, cMap3, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn2_2 = ResNetNode3A(rn2_1, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn2_3 = ResNetNode3A(rn2_2, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn2_4 = ResNetNode3A(rn2_3, cMap4, cMap2, cMap4, 1152, convWScale, convBValue, scValue, bnTimeConst)
rn3_1 = ResNetNode3BInc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst, 2)
rn3_1 = ResNetNode3BInc(rn2_4, cMap4, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn3_2 = ResNetNode3A(rn3_1, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_3 = ResNetNode3A(rn3_2, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_4 = ResNetNode3A(rn3_3, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_5 = ResNetNode3A(rn3_4, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn3_6 = ResNetNode3A(rn3_5, cMap5, cMap3, cMap5, 2304, convWScale, convBValue, scValue, bnTimeConst)
rn4_1 = ResNetNode3BInc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst, 2)
rn4_1 = ResNetNode3BInc(rn3_6, cMap5, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst, 2, $stride1x1$, $stride3x3$)
rn4_2 = ResNetNode3A(rn4_1, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst)
rn4_3 = ResNetNode3A(rn4_2, cMap6, cMap4, cMap6, 4608, convWScale, convBValue, scValue, bnTimeConst)

Просмотреть файл

@ -0,0 +1,603 @@
|labels 0 1 |features -1 -1
|labels 0 1 |features -1 -0.99
|labels 0 1 |features -1 -0.98
|labels 0 1 |features -1 -0.97
|labels 0 1 |features -1 -0.96
|labels 0 1 |features -1 -0.95
|labels 0 1 |features -1 -0.94
|labels 0 1 |features -1 -0.93
|labels 0 1 |features -1 -0.92
|labels 0 1 |features -1 -0.91
|labels 0 1 |features -1 -0.9
|labels 0 1 |features -1 -0.89
|labels 0 1 |features -1 -0.88
|labels 0 1 |features -1 -0.87
|labels 0 1 |features -1 -0.86
|labels 0 1 |features -1 -0.85
|labels 0 1 |features -1 -0.84
|labels 0 1 |features -1 -0.83
|labels 0 1 |features -1 -0.82
|labels 0 1 |features -1 -0.81
|labels 0 1 |features -1 -0.8
|labels 0 1 |features -1 -0.79
|labels 0 1 |features -1 -0.78
|labels 0 1 |features -1 -0.77
|labels 0 1 |features -1 -0.76
|labels 0 1 |features -1 -0.75
|labels 0 1 |features -1 -0.74
|labels 0 1 |features -1 -0.73
|labels 0 1 |features -1 -0.72
|labels 0 1 |features -1 -0.71
|labels 0 1 |features -1 -0.7
|labels 0 1 |features -1 -0.69
|labels 0 1 |features -1 -0.68
|labels 0 1 |features -1 -0.67
|labels 0 1 |features -1 -0.66
|labels 0 1 |features -1 -0.65
|labels 0 1 |features -1 -0.64
|labels 0 1 |features -1 -0.63
|labels 0 1 |features -1 -0.62
|labels 0 1 |features -1 -0.61
|labels 0 1 |features -1 -0.6
|labels 0 1 |features -1 -0.59
|labels 0 1 |features -1 -0.58
|labels 0 1 |features -1 -0.57
|labels 0 1 |features -1 -0.56
|labels 0 1 |features -1 -0.55
|labels 0 1 |features -1 -0.54
|labels 0 1 |features -1 -0.53
|labels 0 1 |features -1 -0.52
|labels 0 1 |features -1 -0.51
|labels 0 1 |features -1 -0.5
|labels 0 1 |features -1 -0.49
|labels 0 1 |features -1 -0.48
|labels 0 1 |features -1 -0.47
|labels 0 1 |features -1 -0.46
|labels 0 1 |features -1 -0.45
|labels 0 1 |features -1 -0.44
|labels 0 1 |features -1 -0.43
|labels 0 1 |features -1 -0.42
|labels 0 1 |features -1 -0.41
|labels 0 1 |features -1 -0.4
|labels 0 1 |features -1 -0.39
|labels 0 1 |features -1 -0.38
|labels 0 1 |features -1 -0.37
|labels 0 1 |features -1 -0.36
|labels 0 1 |features -1 -0.35
|labels 0 1 |features -1 -0.34
|labels 0 1 |features -1 -0.33
|labels 0 1 |features -1 -0.32
|labels 0 1 |features -1 -0.31
|labels 0 1 |features -1 -0.3
|labels 0 1 |features -1 -0.29
|labels 0 1 |features -1 -0.28
|labels 0 1 |features -1 -0.27
|labels 0 1 |features -1 -0.26
|labels 0 1 |features -1 -0.25
|labels 0 1 |features -1 -0.24
|labels 0 1 |features -1 -0.23
|labels 0 1 |features -1 -0.22
|labels 0 1 |features -1 -0.21
|labels 0 1 |features -1 -0.2
|labels 0 1 |features -1 -0.19
|labels 0 1 |features -1 -0.18
|labels 0 1 |features -1 -0.17
|labels 0 1 |features -1 -0.16
|labels 0 1 |features -1 -0.15
|labels 0 1 |features -1 -0.14
|labels 0 1 |features -1 -0.13
|labels 0 1 |features -1 -0.12
|labels 0 1 |features -1 -0.11
|labels 0 1 |features -1 -0.1
|labels 1 0 |features -1 -0.09
|labels 1 0 |features -1 -0.08
|labels 1 0 |features -1 -0.07
|labels 1 0 |features -1 -0.06
|labels 1 0 |features -1 -0.05
|labels 1 0 |features -1 -0.04
|labels 1 0 |features -1 -0.03
|labels 1 0 |features -1 -0.02
|labels 1 0 |features -1 -0.01
|labels 1 0 |features -1 0
|labels 1 0 |features -1 0.01
|labels 1 0 |features -1 0.02
|labels 1 0 |features -1 0.03
|labels 1 0 |features -1 0.04
|labels 1 0 |features -1 0.05
|labels 1 0 |features -1 0.06
|labels 1 0 |features -1 0.07
|labels 1 0 |features -1 0.08
|labels 1 0 |features -1 0.09
|labels 1 0 |features -1 0.1
|labels 1 0 |features -1 0.11
|labels 1 0 |features -1 0.12
|labels 1 0 |features -1 0.13
|labels 1 0 |features -1 0.14
|labels 1 0 |features -1 0.15
|labels 1 0 |features -1 0.16
|labels 1 0 |features -1 0.17
|labels 1 0 |features -1 0.18
|labels 1 0 |features -1 0.19
|labels 1 0 |features -1 0.2
|labels 1 0 |features -1 0.21
|labels 1 0 |features -1 0.22
|labels 1 0 |features -1 0.23
|labels 1 0 |features -1 0.24
|labels 1 0 |features -1 0.25
|labels 1 0 |features -1 0.26
|labels 1 0 |features -1 0.27
|labels 1 0 |features -1 0.28
|labels 1 0 |features -1 0.29
|labels 1 0 |features -1 0.3
|labels 1 0 |features -1 0.31
|labels 1 0 |features -1 0.32
|labels 1 0 |features -1 0.33
|labels 1 0 |features -1 0.34
|labels 1 0 |features -1 0.35
|labels 1 0 |features -1 0.36
|labels 1 0 |features -1 0.37
|labels 1 0 |features -1 0.38
|labels 1 0 |features -1 0.39
|labels 1 0 |features -1 0.4
|labels 1 0 |features -1 0.41
|labels 1 0 |features -1 0.42
|labels 1 0 |features -1 0.43
|labels 1 0 |features -1 0.44
|labels 1 0 |features -1 0.45
|labels 1 0 |features -1 0.46
|labels 1 0 |features -1 0.47
|labels 1 0 |features -1 0.48
|labels 1 0 |features -1 0.49
|labels 1 0 |features -1 0.5
|labels 1 0 |features -1 0.51
|labels 1 0 |features -1 0.52
|labels 1 0 |features -1 0.53
|labels 1 0 |features -1 0.54
|labels 1 0 |features -1 0.55
|labels 1 0 |features -1 0.56
|labels 1 0 |features -1 0.57
|labels 1 0 |features -1 0.58
|labels 1 0 |features -1 0.59
|labels 1 0 |features -1 0.6
|labels 1 0 |features -1 0.61
|labels 1 0 |features -1 0.62
|labels 1 0 |features -1 0.63
|labels 1 0 |features -1 0.64
|labels 1 0 |features -1 0.65
|labels 1 0 |features -1 0.66
|labels 1 0 |features -1 0.67
|labels 1 0 |features -1 0.68
|labels 1 0 |features -1 0.69
|labels 1 0 |features -1 0.7
|labels 1 0 |features -1 0.71
|labels 1 0 |features -1 0.72
|labels 1 0 |features -1 0.73
|labels 1 0 |features -1 0.74
|labels 1 0 |features -1 0.75
|labels 1 0 |features -1 0.76
|labels 1 0 |features -1 0.77
|labels 1 0 |features -1 0.78
|labels 1 0 |features -1 0.79
|labels 1 0 |features -1 0.8
|labels 1 0 |features -1 0.81
|labels 1 0 |features -1 0.82
|labels 1 0 |features -1 0.83
|labels 1 0 |features -1 0.84
|labels 1 0 |features -1 0.85
|labels 1 0 |features -1 0.86
|labels 1 0 |features -1 0.87
|labels 1 0 |features -1 0.88
|labels 1 0 |features -1 0.89
|labels 1 0 |features -1 0.9
|labels 1 0 |features -1 0.91
|labels 1 0 |features -1 0.92
|labels 1 0 |features -1 0.93
|labels 1 0 |features -1 0.94
|labels 1 0 |features -1 0.95
|labels 1 0 |features -1 0.96
|labels 1 0 |features -1 0.97
|labels 1 0 |features -1 0.98
|labels 1 0 |features -1 0.99
|labels 1 0 |features -1 0
|labels 0 1 |features 0 -1
|labels 0 1 |features 0 -0.99
|labels 0 1 |features 0 -0.98
|labels 0 1 |features 0 -0.97
|labels 0 1 |features 0 -0.96
|labels 0 1 |features 0 -0.95
|labels 0 1 |features 0 -0.94
|labels 0 1 |features 0 -0.93
|labels 0 1 |features 0 -0.92
|labels 0 1 |features 0 -0.91
|labels 0 1 |features 0 -0.9
|labels 0 1 |features 0 -0.89
|labels 0 1 |features 0 -0.88
|labels 0 1 |features 0 -0.87
|labels 0 1 |features 0 -0.86
|labels 0 1 |features 0 -0.85
|labels 0 1 |features 0 -0.84
|labels 0 1 |features 0 -0.83
|labels 0 1 |features 0 -0.82
|labels 0 1 |features 0 -0.81
|labels 0 1 |features 0 -0.8
|labels 0 1 |features 0 -0.79
|labels 0 1 |features 0 -0.78
|labels 0 1 |features 0 -0.77
|labels 0 1 |features 0 -0.76
|labels 0 1 |features 0 -0.75
|labels 0 1 |features 0 -0.74
|labels 0 1 |features 0 -0.73
|labels 0 1 |features 0 -0.72
|labels 0 1 |features 0 -0.71
|labels 0 1 |features 0 -0.7
|labels 0 1 |features 0 -0.69
|labels 0 1 |features 0 -0.68
|labels 0 1 |features 0 -0.67
|labels 0 1 |features 0 -0.66
|labels 0 1 |features 0 -0.65
|labels 0 1 |features 0 -0.64
|labels 0 1 |features 0 -0.63
|labels 0 1 |features 0 -0.62
|labels 0 1 |features 0 -0.61
|labels 0 1 |features 0 -0.6
|labels 0 1 |features 0 -0.59
|labels 0 1 |features 0 -0.58
|labels 0 1 |features 0 -0.57
|labels 0 1 |features 0 -0.56
|labels 0 1 |features 0 -0.55
|labels 0 1 |features 0 -0.54
|labels 0 1 |features 0 -0.53
|labels 0 1 |features 0 -0.52
|labels 0 1 |features 0 -0.51
|labels 0 1 |features 0 -0.5
|labels 0 1 |features 0 -0.49
|labels 0 1 |features 0 -0.48
|labels 0 1 |features 0 -0.47
|labels 0 1 |features 0 -0.46
|labels 0 1 |features 0 -0.45
|labels 0 1 |features 0 -0.44
|labels 0 1 |features 0 -0.43
|labels 0 1 |features 0 -0.42
|labels 0 1 |features 0 -0.41
|labels 0 1 |features 0 -0.4
|labels 0 1 |features 0 -0.39
|labels 0 1 |features 0 -0.38
|labels 0 1 |features 0 -0.37
|labels 0 1 |features 0 -0.36
|labels 0 1 |features 0 -0.35
|labels 0 1 |features 0 -0.34
|labels 0 1 |features 0 -0.33
|labels 0 1 |features 0 -0.32
|labels 0 1 |features 0 -0.31
|labels 0 1 |features 0 -0.3
|labels 0 1 |features 0 -0.29
|labels 0 1 |features 0 -0.28
|labels 0 1 |features 0 -0.27
|labels 0 1 |features 0 -0.26
|labels 0 1 |features 0 -0.25
|labels 0 1 |features 0 -0.24
|labels 0 1 |features 0 -0.23
|labels 0 1 |features 0 -0.22
|labels 0 1 |features 0 -0.21
|labels 0 1 |features 0 -0.2
|labels 0 1 |features 0 -0.19
|labels 0 1 |features 0 -0.18
|labels 0 1 |features 0 -0.17
|labels 0 1 |features 0 -0.16
|labels 0 1 |features 0 -0.15
|labels 0 1 |features 0 -0.14
|labels 0 1 |features 0 -0.13
|labels 0 1 |features 0 -0.12
|labels 0 1 |features 0 -0.11
|labels 0 1 |features 0 -0.1
|labels 1 0 |features 0 -0.09
|labels 1 0 |features 0 -0.08
|labels 1 0 |features 0 -0.07
|labels 1 0 |features 0 -0.06
|labels 1 0 |features 0 -0.05
|labels 1 0 |features 0 -0.04
|labels 1 0 |features 0 -0.03
|labels 1 0 |features 0 -0.02
|labels 1 0 |features 0 -0.01
|labels 1 0 |features 0 0
|labels 1 0 |features 0 0.01
|labels 1 0 |features 0 0.02
|labels 1 0 |features 0 0.03
|labels 1 0 |features 0 0.04
|labels 1 0 |features 0 0.05
|labels 1 0 |features 0 0.06
|labels 1 0 |features 0 0.07
|labels 1 0 |features 0 0.08
|labels 1 0 |features 0 0.09
|labels 1 0 |features 0 0.1
|labels 1 0 |features 0 0.11
|labels 1 0 |features 0 0.12
|labels 1 0 |features 0 0.13
|labels 1 0 |features 0 0.14
|labels 1 0 |features 0 0.15
|labels 1 0 |features 0 0.16
|labels 1 0 |features 0 0.17
|labels 1 0 |features 0 0.18
|labels 1 0 |features 0 0.19
|labels 1 0 |features 0 0.2
|labels 1 0 |features 0 0.21
|labels 1 0 |features 0 0.22
|labels 1 0 |features 0 0.23
|labels 1 0 |features 0 0.24
|labels 1 0 |features 0 0.25
|labels 1 0 |features 0 0.26
|labels 1 0 |features 0 0.27
|labels 1 0 |features 0 0.28
|labels 1 0 |features 0 0.29
|labels 1 0 |features 0 0.3
|labels 1 0 |features 0 0.31
|labels 1 0 |features 0 0.32
|labels 1 0 |features 0 0.33
|labels 1 0 |features 0 0.34
|labels 1 0 |features 0 0.35
|labels 1 0 |features 0 0.36
|labels 1 0 |features 0 0.37
|labels 1 0 |features 0 0.38
|labels 1 0 |features 0 0.39
|labels 1 0 |features 0 0.4
|labels 1 0 |features 0 0.41
|labels 1 0 |features 0 0.42
|labels 1 0 |features 0 0.43
|labels 1 0 |features 0 0.44
|labels 1 0 |features 0 0.45
|labels 1 0 |features 0 0.46
|labels 1 0 |features 0 0.47
|labels 1 0 |features 0 0.48
|labels 1 0 |features 0 0.49
|labels 1 0 |features 0 0.5
|labels 1 0 |features 0 0.51
|labels 1 0 |features 0 0.52
|labels 1 0 |features 0 0.53
|labels 1 0 |features 0 0.54
|labels 1 0 |features 0 0.55
|labels 1 0 |features 0 0.56
|labels 1 0 |features 0 0.57
|labels 1 0 |features 0 0.58
|labels 1 0 |features 0 0.59
|labels 1 0 |features 0 0.6
|labels 1 0 |features 0 0.61
|labels 1 0 |features 0 0.62
|labels 1 0 |features 0 0.63
|labels 1 0 |features 0 0.64
|labels 1 0 |features 0 0.65
|labels 1 0 |features 0 0.66
|labels 1 0 |features 0 0.67
|labels 1 0 |features 0 0.68
|labels 1 0 |features 0 0.69
|labels 1 0 |features 0 0.7
|labels 1 0 |features 0 0.71
|labels 1 0 |features 0 0.72
|labels 1 0 |features 0 0.73
|labels 1 0 |features 0 0.74
|labels 1 0 |features 0 0.75
|labels 1 0 |features 0 0.76
|labels 1 0 |features 0 0.77
|labels 1 0 |features 0 0.78
|labels 1 0 |features 0 0.79
|labels 1 0 |features 0 0.8
|labels 1 0 |features 0 0.81
|labels 1 0 |features 0 0.82
|labels 1 0 |features 0 0.83
|labels 1 0 |features 0 0.84
|labels 1 0 |features 0 0.85
|labels 1 0 |features 0 0.86
|labels 1 0 |features 0 0.87
|labels 1 0 |features 0 0.88
|labels 1 0 |features 0 0.89
|labels 1 0 |features 0 0.9
|labels 1 0 |features 0 0.91
|labels 1 0 |features 0 0.92
|labels 1 0 |features 0 0.93
|labels 1 0 |features 0 0.94
|labels 1 0 |features 0 0.95
|labels 1 0 |features 0 0.96
|labels 1 0 |features 0 0.97
|labels 1 0 |features 0 0.98
|labels 1 0 |features 0 0.99
|labels 1 0 |features 0 1
|labels 0 1 |features 1 -1
|labels 0 1 |features 1 -0.99
|labels 0 1 |features 1 -0.98
|labels 0 1 |features 1 -0.97
|labels 0 1 |features 1 -0.96
|labels 0 1 |features 1 -0.95
|labels 0 1 |features 1 -0.94
|labels 0 1 |features 1 -0.93
|labels 0 1 |features 1 -0.92
|labels 0 1 |features 1 -0.91
|labels 0 1 |features 1 -0.9
|labels 0 1 |features 1 -0.89
|labels 0 1 |features 1 -0.88
|labels 0 1 |features 1 -0.87
|labels 0 1 |features 1 -0.86
|labels 0 1 |features 1 -0.85
|labels 0 1 |features 1 -0.84
|labels 0 1 |features 1 -0.83
|labels 0 1 |features 1 -0.82
|labels 0 1 |features 1 -0.81
|labels 0 1 |features 1 -0.8
|labels 0 1 |features 1 -0.79
|labels 0 1 |features 1 -0.78
|labels 0 1 |features 1 -0.77
|labels 0 1 |features 1 -0.76
|labels 0 1 |features 1 -0.75
|labels 0 1 |features 1 -0.74
|labels 0 1 |features 1 -0.73
|labels 0 1 |features 1 -0.72
|labels 0 1 |features 1 -0.71
|labels 0 1 |features 1 -0.7
|labels 0 1 |features 1 -0.69
|labels 0 1 |features 1 -0.68
|labels 0 1 |features 1 -0.67
|labels 0 1 |features 1 -0.66
|labels 0 1 |features 1 -0.65
|labels 0 1 |features 1 -0.64
|labels 0 1 |features 1 -0.63
|labels 0 1 |features 1 -0.62
|labels 0 1 |features 1 -0.61
|labels 0 1 |features 1 -0.6
|labels 0 1 |features 1 -0.59
|labels 0 1 |features 1 -0.58
|labels 0 1 |features 1 -0.57
|labels 0 1 |features 1 -0.56
|labels 0 1 |features 1 -0.55
|labels 0 1 |features 1 -0.54
|labels 0 1 |features 1 -0.53
|labels 0 1 |features 1 -0.52
|labels 0 1 |features 1 -0.51
|labels 0 1 |features 1 -0.5
|labels 0 1 |features 1 -0.49
|labels 0 1 |features 1 -0.48
|labels 0 1 |features 1 -0.47
|labels 0 1 |features 1 -0.46
|labels 0 1 |features 1 -0.45
|labels 0 1 |features 1 -0.44
|labels 0 1 |features 1 -0.43
|labels 0 1 |features 1 -0.42
|labels 0 1 |features 1 -0.41
|labels 0 1 |features 1 -0.4
|labels 0 1 |features 1 -0.39
|labels 0 1 |features 1 -0.38
|labels 0 1 |features 1 -0.37
|labels 0 1 |features 1 -0.36
|labels 0 1 |features 1 -0.35
|labels 0 1 |features 1 -0.34
|labels 0 1 |features 1 -0.33
|labels 0 1 |features 1 -0.32
|labels 0 1 |features 1 -0.31
|labels 0 1 |features 1 -0.3
|labels 0 1 |features 1 -0.29
|labels 0 1 |features 1 -0.28
|labels 0 1 |features 1 -0.27
|labels 0 1 |features 1 -0.26
|labels 0 1 |features 1 -0.25
|labels 0 1 |features 1 -0.24
|labels 0 1 |features 1 -0.23
|labels 0 1 |features 1 -0.22
|labels 0 1 |features 1 -0.21
|labels 0 1 |features 1 -0.2
|labels 0 1 |features 1 -0.19
|labels 0 1 |features 1 -0.18
|labels 0 1 |features 1 -0.17
|labels 0 1 |features 1 -0.16
|labels 0 1 |features 1 -0.15
|labels 0 1 |features 1 -0.14
|labels 0 1 |features 1 -0.13
|labels 0 1 |features 1 -0.12
|labels 0 1 |features 1 -0.11
|labels 0 1 |features 1 -0.1
|labels 0 1 |features 1 -0.09
|labels 0 1 |features 1 -0.08
|labels 0 1 |features 1 -0.07
|labels 0 1 |features 1 -0.06
|labels 0 1 |features 1 -0.05
|labels 0 1 |features 1 -0.04
|labels 0 1 |features 1 -0.03
|labels 0 1 |features 1 -0.02
|labels 0 1 |features 1 -0.01
|labels 1 0 |features 1 0
|labels 1 0 |features 1 0.01
|labels 1 0 |features 1 0.02
|labels 1 0 |features 1 0.03
|labels 1 0 |features 1 0.04
|labels 1 0 |features 1 0.05
|labels 1 0 |features 1 0.06
|labels 1 0 |features 1 0.07
|labels 1 0 |features 1 0.08
|labels 1 0 |features 1 0.09
|labels 1 0 |features 1 0.1
|labels 1 0 |features 1 0.11
|labels 1 0 |features 1 0.12
|labels 1 0 |features 1 0.13
|labels 1 0 |features 1 0.14
|labels 1 0 |features 1 0.15
|labels 1 0 |features 1 0.16
|labels 1 0 |features 1 0.17
|labels 1 0 |features 1 0.18
|labels 1 0 |features 1 0.19
|labels 1 0 |features 1 0.2
|labels 1 0 |features 1 0.21
|labels 1 0 |features 1 0.22
|labels 1 0 |features 1 0.23
|labels 1 0 |features 1 0.24
|labels 1 0 |features 1 0.25
|labels 1 0 |features 1 0.26
|labels 1 0 |features 1 0.27
|labels 1 0 |features 1 0.28
|labels 1 0 |features 1 0.29
|labels 1 0 |features 1 0.3
|labels 1 0 |features 1 0.31
|labels 1 0 |features 1 0.32
|labels 1 0 |features 1 0.33
|labels 1 0 |features 1 0.34
|labels 1 0 |features 1 0.35
|labels 1 0 |features 1 0.36
|labels 1 0 |features 1 0.37
|labels 1 0 |features 1 0.38
|labels 1 0 |features 1 0.39
|labels 1 0 |features 1 0.4
|labels 1 0 |features 1 0.41
|labels 1 0 |features 1 0.42
|labels 1 0 |features 1 0.43
|labels 1 0 |features 1 0.44
|labels 1 0 |features 1 0.45
|labels 1 0 |features 1 0.46
|labels 1 0 |features 1 0.47
|labels 1 0 |features 1 0.48
|labels 1 0 |features 1 0.49
|labels 1 0 |features 1 0.5
|labels 1 0 |features 1 0.51
|labels 1 0 |features 1 0.52
|labels 1 0 |features 1 0.53
|labels 1 0 |features 1 0.54
|labels 1 0 |features 1 0.55
|labels 1 0 |features 1 0.56
|labels 1 0 |features 1 0.57
|labels 1 0 |features 1 0.58
|labels 1 0 |features 1 0.59
|labels 1 0 |features 1 0.6
|labels 1 0 |features 1 0.61
|labels 1 0 |features 1 0.62
|labels 1 0 |features 1 0.63
|labels 1 0 |features 1 0.64
|labels 1 0 |features 1 0.65
|labels 1 0 |features 1 0.66
|labels 1 0 |features 1 0.67
|labels 1 0 |features 1 0.68
|labels 1 0 |features 1 0.69
|labels 1 0 |features 1 0.7
|labels 1 0 |features 1 0.71
|labels 1 0 |features 1 0.72
|labels 1 0 |features 1 0.73
|labels 1 0 |features 1 0.74
|labels 1 0 |features 1 0.75
|labels 1 0 |features 1 0.76
|labels 1 0 |features 1 0.77
|labels 1 0 |features 1 0.78
|labels 1 0 |features 1 0.79
|labels 1 0 |features 1 0.8
|labels 1 0 |features 1 0.81
|labels 1 0 |features 1 0.82
|labels 1 0 |features 1 0.83
|labels 1 0 |features 1 0.84
|labels 1 0 |features 1 0.85
|labels 1 0 |features 1 0.86
|labels 1 0 |features 1 0.87
|labels 1 0 |features 1 0.88
|labels 1 0 |features 1 0.89
|labels 1 0 |features 1 0.9
|labels 1 0 |features 1 0.91
|labels 1 0 |features 1 0.92
|labels 1 0 |features 1 0.93
|labels 1 0 |features 1 0.94
|labels 1 0 |features 1 0.95
|labels 1 0 |features 1 0.96
|labels 1 0 |features 1 0.97
|labels 1 0 |features 1 0.98
|labels 1 0 |features 1 0.99
|labels 1 0 |features 1 1

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,491 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
############################################################################
# G2P.cntk #
# #
# Example for sequence-to-sequence modeling for grapheme-to-phoneme #
# (aka letter-to-sound) conversion on the CMUDict #
############################################################################
# directory defaults (if not overridden)
RunRootDir = "../.." # default if not overridden
DataDir = "$RunRootDir$/Data"
CacheDir = "$DataDir$/cache" # (not used currently)
ExpRootDir = "$RunRootDir$"
# command to execute
command = train
#command = write
#command = dump
makeMode = false
# experiment id
deviceId = 0 # set the GPU device here, or "auto" to auto-select; or override from the command line.
ExpId = g2p-1-$deviceId$ # choose a meaningful id here. This is used for unique directory and filenames.
#ExpId = g2p-1-0 # change to different id when decoding a different model
# directories
ExpDir = "$ExpRootDir$/$ExpId$"
ModelDir = "$ExpDir$/Models"
stderr = $ExpDir$/G2P
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/G2P.dnn"
# decoding config --used by the "write" command ("write" decodes and writes the result)
beamDepth = 3 # 0=predict; 1=greedy; >1=beam
decodeModel = 9
decodeModelPath = "$modelPath$.$decodeModel$" # note: epoch to decode is appended to the model path
decodeOutputPath = "$decodeModelPath$.$beamDepth$" # results are written next to the model, with beamDepth appended
# dump config --used by the "dump" command, for inspecting the model parameters
dumpModelPath = "$modelPath$.2" # put the epoch id here
# top-level model configuration
hiddenDim = 512
maxLayer = 2
isBidirectional = false
# comment/uncomment this or the next block to switch between readers
# Note: Currently this configuration cannot reach the same result with CNTKTextFormatReader.
# This is being investigated. For now, please use the LMSequenceReader.
# --- begin uncomment for LMSequenceReader ---
readerType = "LMSequenceReader"
useCNTKTextFormatReader = false
inputVocabSize = 69
labelVocabSize = 69
shareEmbeddings = true
fileExt = "joint"
# --- end uncomment ---
# --- begin uncomment for CNTKTextFormatReader ---
#readerType = "CNTKTextFormatReader"
#useCNTKTextFormatReader = true
#inputVocabSize = 29 # 26 letters plus start, end, apostrophe
#labelVocabSize = 41 # 39 phonemes (~AX missing), plus start and end symbol (in index 0)
#shareEmbeddings = false
#fileExt = "ctf"
# --- end uncomment ---
# corpus
maxLength = 20 # 0 disables attention
isAutoEncoder=false
startSymbol = "<s>"
trainFile = "g014b2b.train-dev-20-21.bsf.$fileExt$"
validFile = "g014b2b.train-dev-1-21.bsf.$fileExt$"
testFile = "g014b2b.test.bsf.$fileExt$"
vocabFile = "g014b2b.wl"
# some reader variables that occur multiple times
cntkReaderInputDef = [ rawInput = [ alias = "s" ; dim = $inputVocabSize$ ; format = "sparse" ] ; rawLabels = [ alias = "t" ; dim = $labelVocabSize$ ; format = "sparse" ] ]
lmSequenceReaderInputDef = [ dim = 0 ]
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ; labelDim = "$inputVocabSize$" ; labelMappingFile = "$DataDir$/$vocabFile$" ; beginSequence = "</s>" ; endSequence = "</s>" ]
#######################################
# network definition #
#######################################
BrainScriptNetworkBuilder = (new ComputationNetwork [
# import general config options from outside config values
useCNTKTextFormatReader = $useCNTKTextFormatReader$
inputVocabDim = $inputVocabSize$
labelVocabDim = $labelVocabSize$
isAutoencoder = $isAutoEncoder$ # input is only one sequence, meant to reproduce itself
attentionSpan = $maxLength$ # attention window, must be large enough for largest input sequence. 0 to disable. Exactly 20 is needed for the g2p CMUDict task
useBidirectionalEncoder = $isBidirectional$ # bi-directional LSTM for encoder
shareEmbeddings = $shareEmbeddings$
hiddenDim = $hiddenDim$
attentionDim = 128 # dim of attention projection
maxLayer = $maxLayer$ # e.g. 2 for 3 hidden layers
useStabilizer = true
useEncoder = true # if false, this becomes a regular RNN
useNYUStyle = false # if true use thought vector for all inputs, NYU-style
# dimensions
embeddingDim = 300
inputEmbeddingDim = if inputVocabDim < 300 then inputVocabDim else embeddingDim
labelEmbeddingDim = if labelVocabDim < 300 then labelVocabDim else embeddingDim
encoderDims[i:0..maxLayer] = hiddenDim # this defines the number of hidden layers in each
decoderDims[i:0..maxLayer] = hiddenDim # both are one LSTM layer only for now
#############################################################
# inputs
#############################################################
# inputs and axes must be defined on top-scope level in order to get a clean node name from BrainScript.
inputAxis = DynamicAxis()
rawInput = if useCNTKTextFormatReader && !isAutoencoder
then Input (inputVocabDim, dynamicAxis=inputAxis, tag='feature')
else Input (inputVocabDim, tag='feature')
rawLabels = if useCNTKTextFormatReader && !isAutoencoder
then Input (labelVocabDim, tag='label')
else rawInput
# get out input and label data
# Specifically, if the input and label is on a single line, we must split it in two.
streams = [
out = if isAutoencoder || useCNTKTextFormatReader then [
input = TraceSparse (rawInput, 'inp')
labels = TraceSparse (rawLabels, 'lbl')
]
else [
separatorRow = 2 # row index of separator symbol
isSeparator = RowSlice (separatorRow, 1, rawInput) # cut out the separator as a flag
inInput = BS.Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) # flag sequence: word is input...
inLabels = BS.Boolean.Or (PastValue (1, inLabels, defaultHiddenActivation=0), isSeparator) # ...or labels
input = BS.Sequences.Gather (inInput, rawInput) # use flags to split raw input into input and labels
labels = BS.Sequences.Gather (inLabels, rawInput) # (both have different lengths)
]
].out
# inputs and labels are expected to be surrounded by sentence delimiters, e.g. <s> A B C </s> ==> <s> D E F </s>
# The encoder uses all tokens of 'input', while for the target labels we must exclude the initial sentence start, which is only used as the LM history.
inputSequence = Pass (streams.input) # e.g. <s> A B C </s>
labelSequence = Pass (Slice (1, 0, streams.labels, axis=-1)) # e.g. D E F </s>
labelSentenceStart = Pass (BS.Sequences.First (streams.labels)) # e.g. <s>
inputSequenceDim = inputVocabDim
labelSequenceDim = labelVocabDim
isFirstLabel = BS.Loop.IsFirst (labelSequence)
#############################################################
# embeddings --as long as we cannot read multiple sequences, we got one embedding
#############################################################
# Note: Embeddings are linear. Should we use BatchNormalization?
# note: this is assumed to be applied transposed, hence the swapped dimensions. Actually--why? Still needed?
Einput = BS.Parameters.WeightParam (inputSequenceDim, inputEmbeddingDim)
Elabels = if shareEmbeddings then Einput else BS.Parameters.WeightParam (labelSequenceDim, labelEmbeddingDim)
EmbedInput (x) = if inputSequenceDim == inputEmbeddingDim then x else TransposeTimes (Einput, x)
EmbedLabels (x) = if labelSequenceDim == labelEmbeddingDim then x else TransposeTimes (Elabels, x)
inputEmbedded = EmbedInput (inputSequence)
labelsEmbedded = EmbedLabels (labelSequence)
labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart)) # TODO: remove Pass() if not actually needed in decoder
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) # unfortunately needed presently
S(x) = BS.Parameters.Stabilize (x, enabled=useStabilizer)
#############################################################
# encoder (processes inputEmbedded)
#############################################################
# TODO: do not reverse our inputs; instead, if needed, use a backwards-running loop here
encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
previousHook=BS.RNNs.PreviousHC,
enableSelfStabilization=useStabilizer)
encoderOutput = encoder[Length (encoderDims)-1]
# There are three ways of passing encoder state:
# 1. as initial state for decoder (Google style)
# 2. as side information for every decoder step (NYU style)
# 3. attention
# get the final encoder state for use as the initial state
# For beam decoding, we will also inject a second dimension.
thoughtVector = [
h = ReshapeDimension (BS.Sequences.Last (encoderOutput.h), 1, (dim:1))
c = ReshapeDimension (BS.Sequences.Last (encoderOutput.c), 1, (dim:1))
dim = encoderOutput.dim
]
thoughtVectorBroadcast = [ # broadcast to all time steps of the target sequence
h = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
c = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
dim = thoughtVector.dim
]
#############################################################
# decoder reordering hook: propagation of beam hypotheses
#############################################################
# we bake into the LSTMs to multiply h and c with the 'beamSearchReorderHook' matrix, which is
# a dummy in training but will be patched through model editing for beam decoding.
# Specifically, the decoder will replace this by a per-sample matrix that reorders hypotheses according to
# how they propagate. E.g. the 2nd best in a frame may be the history of the 3rd best in the subsequent frame
beamSearchReorderHook = Pass (BS.Constants.OnesTensor (1:1))
# helper functions to delay h and c that apply beam-search reordering, if so configured
PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook) // hidden state(t-1)
c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook) // cell(t-1)
dim = lstmState.dim
]
PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
else [ # with both thought vector and beam-search hook
isFirst = BS.Loop.IsFirst (labelsEmbedded)
h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
dim = lstmState.dim
]
#############################################################
# decoder history hook: LM history, from ground truth vs. output
#############################################################
# these are the two choices for the input to the decoder network
decoderHistoryFromGroundTruth = labelsEmbedded # for training, decoder input is ground truth...
decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z))) # ...but for (greedy) decoding, the decoder's output is its previous input
# during training, we use ground truth. For decoding, we will rewire decoderHistoryHook = decoderHistoryFromOutput
decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) # this gets redirected in decoding to feed back decoding output instead
#############################################################
# decoder
#############################################################
decoderInput = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
decoderInputDim = labelEmbeddingDim
decoderDynamicAxis = labelsEmbedded
FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)
# some parameters to the decoder stack depend on the mode
decoderParams =
# with attention
if useEncoder && attentionSpan > 0 then [
previousHook = PreviousHCWithReorderingHook # add reordering for beam search
augmentInputHook = FixedWindowAttentionHook # input gets augmented by the attention window
augmentInputDim = encoderOutput.dim
]
# with thought vector appended to every frame
else if useEncoder && useNYUStyle then [
previousHook = PreviousHCWithReorderingHook
augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) # each input frame gets augmented by the thought vector
augmentInputDim = thoughtVector.dim
]
# thought vector as initial state for decoder
else [
previousHook = PreviousHCFromThoughtVectorWithReorderingHook # Previous() function with thought vector as initial state
augmentInputHook = BS.RNNs.NoAuxInputHook
augmentInputDim = 0
]
# this is the decoder LSTM stack
decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
S(decoderInput), inputDim=decoderInputDim,
augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
previousHook=decoderParams.previousHook,
enableSelfStabilization=useStabilizer)
decoderOutputLayer = Length (decoder)-1
decoderOutput = decoder[decoderOutputLayer].h
decoderDim = decoderDims[decoderOutputLayer]
#############################################################
# softmax output layer
#############################################################
W = BS.Parameters.WeightParam (labelSequenceDim, decoderDim)
B = BS.Parameters.BiasParam (labelSequenceDim)
z = W * S(decoderOutput) + B; // top-level input to Softmax
#############################################################
# training criteria
#############################################################
#ce = Pass (ReduceLogSum (z) - ReduceSum (labelSequence .* z ), tag='criterion')
#errs = Pass (BS.Constants.One - ReduceSum (labelSequence .* Hardmax (z)), tag='evaluation')
#ce2 = Negate (ReduceSum (labelSequence .* LogSoftmax (z)), tag='evaluation')
#ce1 = CrossEntropyWithSoftmax (labelSequence, z, tag='evaluation') // this is the training objective
#errs = ErrorPrediction (labelSequence, z, tag='evaluation') // this also gets tracked
ce = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence, z), tag='criterion')
errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
# score output for decoding
scoreSequence = Pass (z)
#############################################################
# some helper functions
#############################################################
# these trace functions log their parameter's value
TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])
#######################################
# TRAINING CONFIG #
#######################################
train = [
action = "train"
traceLevel = 1
epochSize = 0 # (for quick tests, this can be overridden with something small)
# BrainScriptNetworkBuilder is defined in outer scope
SGD = [
minibatchSize = 144:144:288:576
learningRatesPerSample = 0.007*2:0.0035
momentumAsTimeConstant = 1100
gradientClippingWithTruncation = true # (as opposed to clipping the Frobenius norm of the matrix)
clippingThresholdPerSample = 2.3 # visibly impacts objectives, but not final result, so keep it for safety
maxEpochs = 50
numMBsToShowResult = 100
firstMBsToShowResult = 10
gradUpdateType = "none" # FSAdaGrad?
loadBestModel = false # true # broken for some models (rereading overwrites something that got set by validation)
# tracing (enable these for debugging)
#traceNodeNamesReal = labelsEmbedded:decoderInput:"decoder[0].lstmState._privateInnards.ht":z.Plus_left.Times_right.result:z:ce
#traceNodeNamesReal = labelsEmbedded:decoderInput:z:ce
#traceNodeNamesCategory = inputSequence.out:labelSequence
dropoutRate = 0.0
# settings for Auto Adjust Learning Rate
AutoAdjust = [
autoAdjustLR = "adjustAfterEpoch"
reduceLearnRateIfImproveLessThan = 0.001
continueReduce = false
increaseLearnRateIfImproveMoreThan = 1000000000
learnRateDecreaseFactor = 0.5
learnRateIncreaseFactor = 1.382
numMiniBatch4LRSearch = 100
numPrevLearnRates = 5
numBestSearchEpoch = 1
]
]
# reader definitions
reader = [
readerType = "$readerType$"
file = "$DataDir$/$trainFile$"
randomize = "auto"
# specific to CNTKTextFormatReader
skipSequenceIds = "false"
maxErrors = 100
traceLevel = 2
chunkSizeInBytes = 30000000 # large enough for entire data set
input = $cntkReaderInputDef$
# specific to LMSequenceReader
mode = "softmax" # TODO: find out what this means
nbruttsineachrecurrentiter = 0 # 0 means auto-fill given minibatch size
cacheBlockSize = 100000000 # read block size. This value is large enough to load entire corpus at once
rawInput = $lmSequenceReaderInputDef$
inputLabelsDef = $lmSequenceReaderInputLabelsDef$
outputDummy = [ labelType = "none" ]
]
cvReader = [
readerType = "$readerType$"
file = "$DataDir$/$validFile$"
randomize = "none"
# specific to CNTKTextFormatReader
skipSequenceIds = "false"
maxErrors = 100
traceLevel = 2
input = $cntkReaderInputDef$
# specific to LMSequenceReader
mode = "softmax" # TODO: find out what this means
nbruttsineachrecurrentiter = 0 # 0 means auto-fill given minibatch size
cacheBlockSize = 100000000 # read block size. This value is large enough to load entire corpus at once
rawInput = $lmSequenceReaderInputDef$
inputLabelsDef = $lmSequenceReaderInputLabelsDef$
outputDummy = [ labelType = "none" ]
]
]
#######################################
# DUMP CONFIG #
#######################################
# dumps the model, specifically the learnable parameters
dump = [
action = "dumpnode"
modelPath = "$dumpModelPath$"
outputFile = "$dumpModelPath$.txt"
]
#######################################
# WRITE CONFIG #
#######################################
# This will decode the test set. The beamDepth parameter specifies the decoding mode:
# beamDepth = 0: word prediction given ground truth history (only useful for perplexity measurement)
# beamDepth = 1: greedy decoding: At each time step, choose a word greedily
# beamDepth > 1: beam decoder. Keep 'beamDepth' best hypotheses, and output their globally best at the end.
write = [
action = "write"
# select the decoder
BrainScriptNetworkBuilder = (
# beamDepth = 0 will decode with the unmodified model.
# beamDepth = 1 will modify the model to use the decoding output as the decoder's input.
# beamDepth > 1 will modify the model to track multiple hypotheses and select the globally best at the end.
if $beamDepth$ == 0 then BS.Network.Load ("$decodeModelPath$")
else if $beamDepth$ == 1 then BS.Seq2Seq.GreedySequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"))
else BS.Seq2Seq.BeamSearchSequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"), $beamDepth$)
)
outputPath = $decodeOutputPath$
#outputPath = "-" # "-" will write to stdout; useful for debugging
# declare the nodes we want to write out
# not all decoder configs have the same node names, so we just list them all
#outputNodeNames = inputsOut:labelsOut:decodeOut:network.beamDecodingModel.inputsOut:network.beamDecodingModel.labelsOut:network.beamDecodingModel.decodeOut
# output format
# We configure the output to emit a flat sequence of token strings.
format = [
type = "category"
transpose = false
labelMappingFile = "$DataDir$/$vocabFile$"
]
minibatchSize = 8192 # choose this to be big enough for the longest sentence
traceLevel = 1
epochSize = 0
reader = [
readerType = "$readerType$"
file = "$DataDir$/$testFile$"
randomize = "none"
# specific to CNTKTextFormatReader
skipSequenceIds = "false"
maxErrors = 100
traceLevel = 2
input = $cntkReaderInputDef$
# specific to LMSequenceReader
mode = "softmax" # TODO: find out what this means
nbruttsineachrecurrentiter = 1 # 1 means one sequence at a time
# BUGBUG: ^^ =0 currently produces bad output. I suspect Times (data, data)
cacheBlockSize = 100000000 # read block size. This value is large enough to load entire corpus at once
rawInput = $lmSequenceReaderInputDef$
inputLabelsDef = $lmSequenceReaderInputLabelsDef$
outputDummy = [ labelType = "none" ]
]
]

Просмотреть файл

@ -0,0 +1,23 @@
This example demonstrates the use of CNTK for letter-to-sound conversion using a
sequence-to-sequence model with attention.
The code supports a number of alternative configurations. As configured currently, it implements
* a 3-hidden layer unidirectional LSTM encoder network, all hidden dimensions are 512
* a 3-hidden layer unidirectional LSTM decoder network, all hidden dimensions are 512
* encoder state is passed to the decoder by means of attention, with projection dimension 128 and maximum input length of 20 tokens
* embedding disabled (the vocabulary is very small)
* beam decoder with beam width 3
This example uses the CMUDict as a corpus. The data or a conversion script will be included soon.
To Use:
=======
Modify the following in G2P.cntk:
* pathnames
* deviceId to specify CPU (-1) or GPU (>=0 or "auto")
Run:
* command line: cntk configFile=Examples/SequenceToSequence/Miscellaneous/G2P/G2P.cntk RunRootDir=g2p
* VS Debugger: configFile=$(SolutionDir)Examples/SequenceToSequence/Miscellaneous/G2P/G2P.cntk RunRootDir=$(SolutionDir)g2p

Просмотреть файл

@ -10,7 +10,6 @@ write=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
printValues=true

Просмотреть файл

@ -12,7 +12,6 @@ TrainDNN=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl
@ -98,7 +97,6 @@ TrainLSTM=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl
networkDescription=$ndlfile$
@ -183,7 +181,6 @@ TrainPACRNN=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
# ndlMacros=$NdlDir$/default_macros.ndl
@ -286,7 +283,6 @@ write=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
printValues=true

Просмотреть файл

@ -12,7 +12,6 @@ TrainDNN=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl

Просмотреть файл

@ -10,7 +10,6 @@ TrainModel=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl
networkDescription=$NdlDir$/model.ndl

Просмотреть файл

@ -11,7 +11,6 @@ TrainModel=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl
networkDescription=$NdlDir$/model.ndl

Просмотреть файл

@ -20,7 +20,6 @@ TrainModel=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl

Просмотреть файл

@ -19,7 +19,6 @@ TrainModel=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
ndlMacros=$NdlDir$/default_macros.ndl

Просмотреть файл

@ -12,7 +12,6 @@ write=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
printValues=true

Просмотреть файл

@ -12,7 +12,6 @@ write=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
printValues=true

Просмотреть файл

@ -20,7 +20,6 @@ speechTrainNDL=[
deviceId=-1
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
networkDescription=$ConfigFolder$\LSTM_1layer.ndl

Просмотреть файл

@ -20,7 +20,6 @@ speechTrainNDL=[
deviceId=0
traceLevel=1
useValidation=true
NDLNetworkBuilder=[
networkDescription=$ConfigFolder$\LSTM_1layer.ndl

Просмотреть файл

@ -12,7 +12,6 @@ TIMIT_TrainLSTM=[
# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=$DeviceNumber$
traceLevel=1
useValidation=true
truncated=true
@ -87,4 +86,4 @@ TIMIT_TrainLSTM=[
labelMappingFile=$MlfDir$/TIMIT.statelist
]
]
]
]

Просмотреть файл

@ -40,7 +40,6 @@ deviceId=-1
epochSize=4430000
# which is 886 * 5000
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
# rnnType=LSTM
@ -307,7 +306,6 @@ test=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
modelPath=$ExpFolder$\modelRnnCNTK
@ -410,4 +408,4 @@ test=[
]
]
]
]
]

Просмотреть файл

@ -41,7 +41,6 @@ train=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
# rnnType=LSTM
@ -308,7 +307,6 @@ test=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
modelPath=$ExpFolder$\modelRnnCNTK
@ -411,4 +409,4 @@ test=[
]
]
]
]
]

Просмотреть файл

@ -30,7 +30,6 @@ epochSize=4430000
# which is 886 * 5000
# recurrentLayer=1
defaultHiddenActivity=0.0
useValidation=true
rnnType=CLASSLM
# rnnType=LSTM
@ -297,7 +296,6 @@ test=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
modelPath=$ExpFolder$\modelRnnCNTK
@ -400,4 +398,4 @@ test=[
]
]
]
]
]

Просмотреть файл

@ -31,7 +31,6 @@ epochSize=4430000
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.0
useValidation=true
rnnType=CLASSLM
# rnnType=LSTM
@ -298,7 +297,6 @@ test=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
modelPath=$ExpFolder$\modelRnnCNTK
@ -401,4 +399,4 @@ test=[
]
]
]
]
]

Просмотреть файл

@ -31,7 +31,6 @@ train=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.0
useValidation=true
rnnType=CLASSLM
# rnnType=LSTM
@ -297,7 +296,6 @@ test=[
# which is 886 * 5000
recurrentLayer=1
defaultHiddenActivity=0.1
useValidation=true
rnnType=CLASSLM
modelPath=$ExpFolder$\modelRnnCNTK
@ -400,4 +398,4 @@ test=[
]
]
]
]
]

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,48 +0,0 @@
# TODO: must sort this out. For now, this is just shared stuff between training and decoding.
# these depend on beamDepth parameter for now, fix this
TraceState (h, what) =
if enableTracing
then Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3, format=formatDense))
else h
TraceDense (h, what) =
if enableTracing
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=21/*beamDepth*beamDepth*/, onlyUpToT=25, format=formatDense)
else h
TraceDenseTransposed (h, what) =
if enableTracing
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, onlyUpToRow=beamDepth*beamDepth, onlyUpToT=25, format=formatDenseTransposed)
else h
TraceOneHot (h, what) =
if enableTracing
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=15,*/ format=formatOneHot)
else h
TraceSparse (h, what) =
if enableTracing
then Trace (h, say=what, logFirst=10, logFrequency=traceFrequency, logGradientToo=false, /*onlyUpToRow=beamDepth*beamDepth, onlyUpToT=3,*/ format=formatSparse)
else h
Trace (node, say='', logFrequency=traceFrequency, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [
operation = 'Trace' ; inputs = node
]
formatDense = [
type = "real"
transpose = false
precisionFormat = ".4"
]
formatDenseTransposed = [
type = "real"
transpose = true
precisionFormat = ".4"
]
formatOneHot = [
type = "category"
transpose = false
labelMappingFile = tracingLabelMappingFile
]
formatSparse = [
type = "sparse"
transpose = false
labelMappingFile = tracingLabelMappingFile
]

Просмотреть файл

@ -171,6 +171,17 @@ ifndef CNTK_CUDA_GENCODE
endif
endif
# Should we relocate *.gcno and *.gcda files using -fprofile-dir option?
# Use GCOV_PREFIX and GCOV_PREFIX_STRIP if relocating:
# For example, if the object file /user/build/foo.o was built with -fprofile-arcs, the final executable will try to create the data file
# /user/build/foo.gcda when running on the target system. This will fail if the corresponding directory does not exist and it is unable
# to create it. This can be overcome by, for example, setting the environment as GCOV_PREFIX=/target/run and GCOV_PREFIX_STRIP=1.
# Such a setting will name the data file /target/run/build/foo.gcda
ifdef CNTK_CODE_COVERAGE
CXXFLAGS += -fprofile-arcs -ftest-coverage
LDFLAGS += -lgcov --coverage
endif
ifeq ("$(BUILDTYPE)","debug")
ifdef CNTK_CUDA_CODEGEN_DEBUG
GENCODE_FLAGS := $(CNTK_CUDA_CODEGEN_DEBUG)
@ -243,6 +254,7 @@ READER_SRC =\
$(SOURCEDIR)/Readers/ReaderLib/TruncatedBpttPacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/PackerBase.cpp \
$(SOURCEDIR)/Readers/ReaderLib/FramePacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/ChunkCache.cpp \
COMMON_SRC =\
$(SOURCEDIR)/Common/Config.cpp \
@ -257,10 +269,12 @@ COMMON_SRC =\
MATH_SRC =\
$(SOURCEDIR)/Math/CPUMatrix.cpp \
$(SOURCEDIR)/Math/CPUSparseMatrix.cpp \
$(SOURCEDIR)/Math/CPURNGHandle.cpp \
$(SOURCEDIR)/Math/MatrixQuantizerImpl.cpp \
$(SOURCEDIR)/Math/MatrixQuantizerCPU.cpp \
$(SOURCEDIR)/Math/QuantizedMatrix.cpp \
$(SOURCEDIR)/Math/Matrix.cpp \
$(SOURCEDIR)/Math/RNGHandle.cpp \
$(SOURCEDIR)/Math/TensorView.cpp \
$(SOURCEDIR)/Math/CUDAPageLockedMemAllocator.cpp \
$(SOURCEDIR)/Math/ConvolutionEngine.cpp \
@ -272,6 +286,7 @@ MATH_SRC +=\
$(SOURCEDIR)/Math/GPUTensor.cu \
$(SOURCEDIR)/Math/GPUSparseMatrix.cu \
$(SOURCEDIR)/Math/GPUWatcher.cu \
$(SOURCEDIR)/Math/GPURNGHandle.cu \
$(SOURCEDIR)/Math/MatrixQuantizerGPU.cu \
$(SOURCEDIR)/Math/CuDnnCommon.cu \
$(SOURCEDIR)/Math/CuDnnConvolutionEngine.cu \
@ -341,6 +356,24 @@ $(LIBDIR)/HTKMLFReader.so: $(HTKMLFREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
########################################
# CompositeDataReader plugin
########################################
COMPOSITEDATAREADER_SRC =\
$(SOURCEDIR)/Readers/CompositeDataReader/CompositeDataReader.cpp \
$(SOURCEDIR)/Readers/CompositeDataReader/Exports.cpp \
COMPOSITEDATAREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(COMPOSITEDATAREADER_SRC))
COMPOSITEDATAREADER:=$(LIBDIR)/CompositeDataReader.so
ALL+=$(COMPOSITEDATAREADER)
SRC+=$(COMPOSITEDATAREADER_SRC)
$(LIBDIR)/CompositeDataReader.so: $(COMPOSITEDATAREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
########################################
# ExperimentalHTKMLFReader plugin
########################################
@ -554,9 +587,10 @@ ifeq (,$(wildcard Source/1BitSGD/*.h))
$(error Build with 1bit-SGD was requested but cannot find the code. Please check https://github.com/Microsoft/CNTK/wiki/Enabling-1bit-SGD for instructions)
endif
INCLUDEPATH += $(SOURCEDIR)/1BitSGD
INCLUDEPATH += $(SOURCEDIR)/1BitSGD
COMMON_FLAGS += -DQUANTIZED_GRADIENT_AGGREGATION
COMMON_FLAGS += -DCNTK_PARALLEL_TRAINING_SUPPORT
# temporarily adding to 1bit, need to work with others to fix it
endif
########################################

Просмотреть файл

@ -1,16 +1,19 @@
# CNTK
## Latest news
*2016-05-16.* An example illustrating [Using CNTK with ResNet](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Miscellaneous/ImageNet/ResNet) is added to the codebase. The example contains some pre-trained models that can be used in various applications.
*2016-05-16.* CNTK Wiki now has [FAQ Page](https://github.com/Microsoft/CNTK/wiki/CNTK-FAQ)
*2016-05-05.* CNTK now supports *BlockMomentum* Stochastic Gradient Descent (SGD) algorithm.
See the details in the [Multiple GPUs and machines Wiki section](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines)
*2016-05-03.* New transformations are implemented for **Image Reader**.
See the description in the [Image Reader Wiki section](https://github.com/Microsoft/CNTK/wiki/Image-reader)
*2016-04-25.* V 1.1 Binary release
CNTK v.1.1 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases/tag/v1.1)
*2016-04-12.* CNTK is available as [Azure Virtual Machines](https://github.com/Microsoft/CNTK/wiki/CNTK-on-Azure) and [Docker Containers](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers)
*2016-04-12.* Added support for ND convolution and ND pooling and CPU support for `cudnn` layout in convolution, pooling and batch normalization nodes.
Read [documentation](https://github.com/Microsoft/CNTK/wiki/Full-NDL-Function-Reference) on convolution, pooling and batch normalization nodes.
*2016-04-05.* CUDA7.5 support for Windows Build: Windows project files have been updated to automatically utilize CUDA 7.5 if present
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK

@ -1 +1 @@
Subproject commit f57be8b8caeddf385a44a14acc587f4e5168152d
Subproject commit 18fcb1a9378432ae179948b0f1e281115a2c7d86

Просмотреть файл

@ -61,6 +61,8 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
@ -75,7 +77,7 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())),
config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector())));
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize);
}
@ -125,6 +127,8 @@ void DoCrossValidate(const ConfigParameters& config)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
@ -157,7 +161,7 @@ void DoCrossValidate(const ConfigParameters& config)
cvModels.push_back(cvModelPath);
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, cvModelPath);
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
SimpleEvaluator<ElemType> eval(net, MPIWrapper::GetInstance(), enableDistributedMBReading, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches);
fprintf(stderr, "Model %ls --> \n", cvModelPath.c_str());
auto evalErrors = eval.Evaluate(&cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);

Просмотреть файл

@ -89,7 +89,7 @@ bool TryGetNetworkFactory(const ConfigRecordType& config, function<ComputationNe
L"precision = '%ls'\n" // 'float' or 'double'
L"network = %ls", // source code of expression that evaluates to a ComputationNetwork
(int)deviceId, ElemTypeName<ElemType>(), sourceOfNetwork.c_str());
let expr = BS::ParseConfigDictFromString(sourceOfBS, move(includePaths));
let expr = BS::ParseConfigDictFromString(sourceOfBS, L"BrainScriptNetworkBuilder", move(includePaths));
// the rest is done in a lambda that is only evaluated when a virgin network is needed
// Note that evaluating the BrainScript *is* instantiating the network, so the evaluate call must be inside the lambda.

Просмотреть файл

@ -874,27 +874,48 @@ public:
{
let &config = *configp;
double &us = *this; // we write to this
let arg = config[L"arg"];
let whatArg = config[L"what"];
wstring what = whatArg;
if (what == L"Floor")
us = floor((double) arg);
else if (what == L"Length")
if (what == L"Floor" || what == L"Length") // one-arg functions
{
if (arg.Is<String>())
us = (double) ((wstring &) arg).size();
else // otherwise expect an array
let arg = config[L"arg"];
if (what == L"Floor")
{
let & arr = arg.AsRef<ConfigArray>();
let range = arr.GetIndexRange();
us = (double) (range.second + 1 - range.first);
us = floor((double)arg);
}
else if (what == L"Length")
{
if (arg.Is<String>())
us = (double)((wstring &)arg).size();
else // otherwise expect an array
{
let & arr = arg.AsRef<ConfigArray>();
let range = arr.GetIndexRange();
us = (double)(range.second + 1 - range.first);
}
}
}
else if (what == L"Mod" || what == L"IntDiv") //two-arg int functions
{
let argsArg = config[L"args"];
let& args = argsArg.AsRef<ConfigArray>();
auto range = args.GetIndexRange();
if (range.second != range.first + 1)
argsArg.Fail(L"Mod/IntDiv expects two arguments");
let arg1 = (int)args.At(range.first);
let arg2 = (int)args.At(range.second);
if (what == L"Mod")
us = (int)(arg1 % arg2);
else if (what == L"IntDiv")
us = (int)(arg1 / arg2);
}
else
whatArg.Fail(L"Unknown 'what' value to NumericFunction: " + what);
}
};
// CompareFunctions
// - IsSameObject()
class CompareFunction : public BoxOf<Bool>

Просмотреть файл

@ -122,11 +122,10 @@ struct Issue
issues.back().AddMarkup(symbol, location.charPos);
}
// print it backwards
if (!locations.empty()) // (be resilient to some throwers not having a TextrLocation; to be avoided)
if (!locations.empty()) // (be resilient to some throwers not having a TextLocation; to be avoided)
{
let& firstLoc = issues.front().location;
fprintf(stderr, "\n%ls while %ls line %d char %d of %ls\n", errorKind, kind, (int) firstLoc.lineNo + 1 /*report 1-based*/, (int) firstLoc.charPos + 1, firstLoc.GetSourceFile().path.c_str());
fprintf(stderr, "see location marked ^ and parent contexts marked 0..9, a..z, A..Z:\n\n");
fprintf(stderr, "[CALL STACK]\n");
for (auto i = issues.rbegin(); i != issues.rend(); i++)
{
let& issue = *i;
@ -135,9 +134,11 @@ struct Issue
const auto line = (where.lineNo == lines.size()) ? L"(end)" : lines[where.lineNo].c_str();
fprintf(stderr, " %ls\n %ls\n", line, issue.markup.c_str());
}
fprintf(stderr, "%ls while %ls: %ls(%d)", errorKind, kind, firstLoc.GetSourceFile().path.c_str(), (int)firstLoc.lineNo + 1 /*report 1-based*/);
}
fprintf(stderr, "%ls: %ls\n", errorKind, what);
fflush(stderr);
else
fprintf(stderr, "%ls while %ls", errorKind, kind);
fprintf(stderr, ": %ls\n", what), fflush(stderr);
}
/*static*/ vector<SourceFile> TextLocation::sourceFileMap;
@ -941,7 +942,7 @@ public:
static void Test()
{
let parserTest = L"a=1\na1_=13;b=2 // cmt\ndo = (print\n:train:eval) ; x = array[1..13] (i=>1+i*print.message==13*42) ; print = new PrintAction [ message = 'Hello World' ]";
ParseConfigDictFromString(parserTest, vector<wstring>())->Dump();
ParseConfigDictFromString(parserTest, L"Test", vector<wstring>())->Dump();
}
};
@ -950,9 +951,9 @@ static ExpressionPtr Parse(SourceFile&& sourceFile, vector<wstring>&& includePat
{
return Parser(move(sourceFile), move(includePaths)).ParseRecordMembersToDict();
}
ExpressionPtr ParseConfigDictFromString(wstring text, vector<wstring>&& includePaths)
ExpressionPtr ParseConfigDictFromString(wstring text, wstring location, vector<wstring>&& includePaths)
{
return Parse(SourceFile(L"(command line)", text), move(includePaths));
return Parse(SourceFile(location, text), move(includePaths));
}
//ExpressionPtr ParseConfigDictFromFile(wstring path, vector<wstring> includePaths)
//{

Просмотреть файл

@ -78,9 +78,9 @@ public:
virtual const wchar_t* kind() const = 0; // e.g. "warning" or "error"
// pretty-print this as an error message
void /*ScriptingException::*/ PrintError() const
void /*ScriptingException::*/ PrintError(const std::wstring& linePrefix) const
{
TextLocation::PrintIssue(locations, L"error", kind(), msra::strfun::utf16(what()).c_str());
TextLocation::PrintIssue(locations, linePrefix.c_str(), kind(), msra::strfun::utf16(what()).c_str());
}
void AddLocation(TextLocation where)
{
@ -134,7 +134,7 @@ struct Expression
typedef Expression::ExpressionPtr ExpressionPtr; // circumvent some circular definition problem
// access the parser through one of these functions
ExpressionPtr ParseConfigDictFromString(wstring text, vector<wstring>&& includePaths); // parses a list of dictionary members, returns a dictionary expression
ExpressionPtr ParseConfigDictFromString(wstring text, wstring location, vector<wstring>&& includePaths); // parses a list of dictionary members, returns a dictionary expression
// TODO: These rvalue references are no longer adding value, change to const<>&
//ExpressionPtr ParseConfigDictFromFile(wstring path, vector<wstring> includePaths); // likewise, but from a file path
ExpressionPtr ParseConfigExpression(const wstring& sourceText, vector<wstring>&& includePaths); // parses a single expression from sourceText, which is meant to contain an include statement, hence includePaths

Просмотреть файл

@ -178,7 +178,7 @@ void SomeTests()
{
fprintf(stderr, "\n### Test %d ###\n\n", (int) i), fflush(stderr);
let parserTest = parserTests[i];
let expr = ParseConfigDictFromString(standardFunctions + computationNodes + commonMacros + parserTest, vector<wstring>());
let expr = ParseConfigDictFromString(standardFunctions + computationNodes + commonMacros + parserTest, L"Test", vector<wstring>());
//expr->Dump();
Do(expr);
if (oneOnly)
@ -187,7 +187,8 @@ void SomeTests()
}
catch (const ConfigException& err)
{
err.PrintError();
err.PrintError(L"error");
}
}
} } } // namespaces
}}} // namespaces

Просмотреть файл

@ -21,6 +21,9 @@ Min(a,b) = if a < b then a else b
Max(a,b) = if a > b then a else b
Fac(n) = if n > 1 then Fac(n-1) * n else 1
IsSameObject(a,b) = new CompareFunction [ what = 'IsSameObject' ; args = (a : b) ]
Mod(x, y) = new NumericFunction [ what = 'Mod' ; args = (x:y) ]
IntDiv(x, y) = new NumericFunction [ what = 'IntDiv' ; args = (x:y) ]
##############################################################################
# ComputationNodes
@ -182,6 +185,8 @@ CosDistanceWithNegativeSamples(aVectorSequence, anotherVectorSequence, numShifts
Cosine(x, tag='') = new ComputationNode [ operation = 'Cosine' ; inputs = x /*plus the function args*/ ]
CrossEntropy(refProbVectorSequence, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropy' ; inputs = (refProbVectorSequence : outProbVectorSequence) /*plus the function args*/ ]
CrossEntropyWithSoftmax(labelVectorSequence, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = (labelVectorSequence : outProbVectorSequence) /*plus the function args*/ ]
# once ReduceLogSum becomes proper C++, CrossEntropyWithSoftmax() will become this:
NewCrossEntropyWithSoftmax (labelSequence, z, tag='') = [ tag1 = tag; out = Minus (ReduceLogSum (z), ReduceSum (labelSequence .* z), tag=tag1) ].out
DiagTimes(diagonalMatrixAsColumnVector, matrix, tag='') = new ComputationNode [ operation = 'DiagTimes' ; inputs = (diagonalMatrixAsColumnVector : matrix) /*plus the function args*/ ]
// TODO: DiagTimes = ElementTimes
Dropout(activationVectorSequence, tag='') = new ComputationNode [ operation = 'Dropout' ; inputs = activationVectorSequence /*plus the function args*/ ]
@ -197,6 +202,7 @@ KhatriRaoProduct(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operat
Log(x, tag='') = new ComputationNode [ operation = 'Log' ; inputs = x /*plus the function args*/ ]
LogPlus(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'LogPlus' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
LogSoftmax(z, tag='') = new ComputationNode [ operation = 'LogSoftmax' ; inputs = z /*plus the function args*/ ]
# TODO: ^^ along axis, like Softmax
MatrixL1Reg(matrix, tag='') = new ComputationNode [ operation = 'MatrixL1Reg' ; inputs = matrix /*plus the function args*/ ]
MatrixL2Reg(matrix, tag='') = new ComputationNode [ operation = 'MatrixL2Reg' ; inputs = matrix /*plus the function args*/ ]
Mean(dataVectorSequence, tag='') = new ComputationNode [ operation = 'Mean' ; inputs = dataVectorSequence /*plus the function args*/ ]
@ -209,27 +215,42 @@ PerDimMeanVarNormalization(dataVectorSequence, meanVector, invStdDevVector, tag=
Plus(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'Plus' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
Reciprocal(z, tag='') = new ComputationNode [ operation = 'Reciprocal' ; inputs = z /*plus the function args*/ ]
RectifiedLinear(z, tag='') = new ComputationNode [ operation = 'RectifiedLinear' ; inputs = z /*plus the function args*/ ]
ReducePlus (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Plus" /*plus the function args*/ ]
#ReduceLogPlus (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "LogPlus" /*plus the function args*/ ]
ReduceSum (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Sum" /*plus the function args*/ ]
# the following is a temporary workaround until we have the C++ version
ReduceLogSum (z, axis=0, tag='') = if axis != 0 then Fail("ReduceLogSum for now only supports axis=0.")
else [ tag1=tag ; axis1=axis ; out = RowSlice (0, 1, z - LogSoftmax (z), tag=tag1) ].out
#ReduceLogSum (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "LogSum" /*plus the function args*/ ]
#ReduceMean (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Mean" /*plus the function args*/ ]
#ReduceMax (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Max" /*plus the function args*/ ]
#ReduceMin (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Min" /*plus the function args*/ ]
RNN(A, B, hiddenSize=10, numLayers=1, bidirectional=false, rnnMode='LSTM', tag='') = new ComputationNode [ operation = 'RNN' ; inputs = ( A : B ) /*plus the function args*/ ]
Round(x, tag='') = Floor(Plus(x, ConstantTensor(0.5, (1))), tag=tag)
Scale(scalarScalingFactor, matrix, tag='') = new ComputationNode [ operation = 'Scale' ; inputs = (scalarScalingFactor : matrix) /*plus the function args*/ ]
// TODO: Scale = ElementTimes
# TODO: Scale = ElementTimes
ScatterPacked(cond, indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'ScatterPacked' ; inputs = (cond : indexSequence : sourceData) /*plus the function args*/ ]
Sigmoid(z, tag='') = new ComputationNode [ operation = 'Sigmoid' ; inputs = z /*plus the function args*/ ]
Sin(z, tag='') = new ComputationNode [ operation = 'Sin' ; inputs = z /*plus the function args*/ ]
Softmax(z, tag='') = new ComputationNode [ operation = 'Softmax' ; inputs = z /*plus the function args*/ ]
Softmax (z, axis=0, tag='') = # TODO: replace this with more efficient version below once we have ReduceLogSum
if axis == 0 then new ComputationNode [ operation = 'Softmax' ; inputs = z /*plus the function args*/ ]
else
[
numerator = Softmax (z) # do a first Softmax to bring it into harmless numeric range
denominator = ReduceSum (axis=axis1, numerator) ; axis1 = axis # reduce along axis
P = numerator .* Reciprocal (denominator) # normalize numerator by the sum along the given axis
# TODO: This is not efficient. Once we have ReduceLogSum, it will be this:
#Z = ReduceLogSum (axis=axis0, z) # reduce along axis
#P = Exp (z - Z)
].P
Hardmax(z, tag='') = new ComputationNode [ operation = 'Hardmax' ; inputs = z /*plus the function args*/ ]
Sqrt(z, tag='') = new ComputationNode [ operation = 'Sqrt' ; inputs = z /*plus the function args*/ ]
SquareError(aMatrix, anotherMatrix, tag='') = new ComputationNode [ operation = 'SquareError' ; inputs = (aMatrix : anotherMatrix) /*plus the function args*/ ]
SumColumnElements(z, tag='') = new ComputationNode [ operation = 'SumColumnElements' ; inputs = z /*plus the function args*/ ] // deprecated
SumColumnElements(z, tag='') = new ComputationNode [ operation = 'SumColumnElements' ; inputs = z /*plus the function args*/ ] # deprecated
SumElements(matrix, tag='') = new ComputationNode [ operation = 'SumElements' ; inputs = matrix /*plus the function args*/ ]
# ^^ TODO: Rename to ReduceSumMB?
Tanh(z, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = z /*plus the function args*/ ]
TimeReverse(vectorSequence, tag='') = new ComputationNode [ operation = 'TimeReverse' ; inputs = vectorSequence /*plus the function args*/ ]
Trace (node, say='', logFrequency=traceFrequency, logFirst=10, logGradientToo=false, onlyUpToRow=100000000, onlyUpToT=100000000, format=[], tag='') = new ComputationNode [ operation = 'Trace' ; inputs = node ]
TransposeTimes(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'TransposeTimes' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
Where(cond, tag='') = new ComputationNode [ operation = 'Where' ; inputs = cond /*plus the function args*/ ]
@ -275,6 +296,8 @@ Constants = [
# is this like Sequences.Repeat?
True = 1
False = 0
None = ConstantTensor (42, (1))
IsNone (x) = IsSameObject (x, None)
]
##############################################################################
@ -301,6 +324,7 @@ Boolean = [
# select a value
# Note: This will be replaced by BrainScript 'if cond then thenVal else elseVal' and SwitchNode
If (cond, thenVal, elseVal, tag='') = new ComputationNode [ operation = 'If' ; inputs = (cond : thenVal : elseVal) /*plus the function args*/ ]
#If (cond, thenVal, elseVal) = cond .* thenVal + Not (cond) .* elseVal
]
##############################################################################
@ -329,13 +353,25 @@ Sequences = [
# returns a record [ value=..., valid=... ], both being 1-step sequences of [dim x N]. N can optionally be moved to axes >2.
# This implementation is suboptimal in that it creates copies for the intermediate steps.
PastValueWindow (N, in, axis=2) = [
isLast = Loop.IsLast (in)
isLastIndex = PackedIndex (in, Where (isLast))
GatherLast (x) = GatherPacked (isLastIndex, x) # 'cond' matches 'x'
onesLikeIn = Constants.OnesLike (in)
delayLine[t:0..N-1] = [ # shift register for encoder, last N inputs
value = if t == 0
then in # delay 0: current value
else Loop.PastValue (0, in, timeStep=t)
then in # delay 0: current value
else PastValue (0, in, timeStep=t, defaultHiddenActivation=0)
valid = if t == 0
then Constants.One
else Constants.One - PastValue (1, Constants.ZeroesLike (in), timeStep=t, defaultHiddenActivation=1)
then onesLikeIn # BUGBUG: if I say Constant.Ones here, it outputs 0. Ones has no MBLayout
else PastValue (1, onesLikeIn, timeStep=t, defaultHiddenActivation=0)
TraceDenseTransposed (h, what) = h
# Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
lastValue = TraceDenseTransposed( GatherLast (value) ,'dvalue') # [i, delay]
lastValid = TraceDenseTransposed( GatherLast (valid) ,'dvalid') # [i, delay]
]
# delayLine[t].value = value of t steps in the past
# delayLine[t].valid = true if we had a value t steps in the past
@ -343,8 +379,8 @@ Sequences = [
if axis == 2 then SplitDimension (x, 1, N)
else if axis > 2 then TransposeDimensions (SplitDimension (x, 1, N), 2, axis)
else Fail ("PastValueWindow: axis>2 required.") # BUGBUG: We also require that input is a single vector. Address later.
value = Slice (-1, 0, axis=-1, SplitStack (RowStack (array[0..N-1](t=>delayLine[t].value)))) # [i, delay]
valid = Slice (-1, 0, axis=-1, SplitStack (RowStack (array[0..N-1](t=>delayLine[t].valid)))) # [i, delay]
value = SplitStack (RowStack (array[0..N-1](t=>delayLine[t].lastValue))) # [i, delay]
valid = SplitStack (RowStack (array[0..N-1](t=>delayLine[t].lastValid))) # [i, delay]
]
# fold left/right: Reduce entire sequence by applying binaryOp, e.g. FoldL (Plus, 0, input)
@ -369,47 +405,48 @@ Sequences = [
# sequence-altering LINQ-like operators
# These generate new data packing (MBLayouts)
# TakeWhile and DropWhile
TakeWhile (predicate, x) = Filter ( _WhilePredicate (PastValue, predicate), x)
SkipWhile (predicate, x) = Filter (!_WhilePredicate (PastValue, predicate), x)
_WhilePredicate (DelayFn, predicate, input) =
[
whilePredicateRec = Boolean.And (DelayFn (whilePredicateRec, defaultHiddenActivation=Boolean.True), predicate)
].whilePredicateRec
# TODO: do we need operations from the back?
# First and Take
# LINQ allows predicates as well.
First (x) = Take (1, x)
Take (N, x) = _Take (PastValue, N, x)
_Take (DelayFn, N, x) = [
selected = Loop._IsWithin (DelayFn, N, x)
out = Gather (selected, x)
].out
# Last and TakeRight
Last (x) = TakeRight (1, x)
TakeRight (N, x) = _Take (FutureValue, N, x)
Skip (N, x) = if N > 0 then _Skip (PastValue, N, x) else x
_Skip (DelayFn, N, x) = [ // TODO: merge with _Take
selected = Loop._IsWithin (DelayFn, N, x)
out = Gather (Boolean.Not (selected), x)
].out
ElementAt (n, x) = [ // not efficient, as it filters twice. Better AND the predicates. TODO: what if n is out of range? ElementAtOrDefault
startMask = Skip (n, x) // ...000111...
mask = startMask - PastValue (0, startMask) // ...000100...
out = Gather (mask, x)
]
Single (predicate, x) = x
First (x) = Slice (0, 1, x, axis=-1)
Last (x) = Slice (-1, 0, x, axis=-1)
# TakeWhile and DropWhile
#TakeWhile (predicate, x) = Filter ( _WhilePredicate (PastValue, predicate), x)
#SkipWhile (predicate, x) = Filter (!_WhilePredicate (PastValue, predicate), x)
#_WhilePredicate (DelayFn, predicate, input) =
#[
# whilePredicateRec = Boolean.And (DelayFn (whilePredicateRec, defaultHiddenActivation=Boolean.True), predicate)
#].whilePredicateRec
# TODO: do we need operations from the back?
#Take (N, x) = _Take (PastValue, N, x)
#TakeRight (N, x) = _Take (FutureValue, N, x)
#_Take (DelayFn, N, x) = [
# selected = Loop._IsWithin (DelayFn, N, x)
# out = Gather (selected, x)
#].out
#
#Skip (N, x) = if N > 0 then _Skip (PastValue, N, x) else x
#_Skip (DelayFn, N, x) = [ // TODO: merge with _Take
# selected = Loop._IsWithin (DelayFn, N, x)
# out = Gather (Boolean.Not (selected), x)
#].out
#ElementAt (n, x) = [ // not efficient, as it filters twice. Better AND the predicates. TODO: what if n is out of range? ElementAtOrDefault
# startMask = Skip (n, x) // ...000111...
# mask = startMask - PastValue (0, startMask) // ...000100...
# out = Gather (mask, x)
#]
#Single (predicate, x) = x
#FirstOrDefault (x) = ? // can empty sequences exist or even be represented by CNTK?
Average (x) = Sum (x) / Loop.Count(x) // TODO: patch opQuotient to check 0/0 = 0
Sum (x) = FoldL (Plus, 0, x)
LogSum (x) = FoldL (LogPlus, 0, x)
#Average (x) = Sum (x) / Loop.Count(x) // TODO: patch opQuotient to check 0/0 = 0
#Sum (x) = FoldL (Plus, 0, x)
#LogSum (x) = FoldL (LogPlus, 0, x)
#Max (x) = FoldL (^.Max, ?, x) // TODO: name clash; need to implement ^.
#Min (x) = FoldL (^.Min, ?, x) // TODO: what's the init value?
All (x) = FoldL (Boolean.And, OnesLike (x), x)
Any (x) = FoldL (Boolean.Or, ZeroesLike (x), x)
#All (x) = FoldL (Boolean.And, OnesLike (x), x)
#Any (x) = FoldL (Boolean.Or, ZeroesLike (x), x)
# Join to create 2D fields for s2s attention?
@ -478,10 +515,24 @@ Parameters =
StabilizeElements (x, inputDim=x.dim, enabled=true) =
if enabled
then [
beta = Exp (BiasParam ((inputDim)))
result = beta .* x
#beta = Exp (BiasParam ((inputDim))) # init value is 0
#beta = ParameterTensor ((inputDim), init='fixedValue', value=1.0) # init value is 1
# or SoftPlus: ln(1+e^beta)
#beta = Log (Constants.One + Exp (ParameterTensor ((inputDim), init='fixedValue', value=0.54132485/*ln (e-1)*/))) # init value is 1
# sharpened Softplus: 1/f ln(1+e^{f*beta})
# this behaves linear for weights around 1, yet guarantees positiveness
f = ConstantTensor (4, (1))
fInv = Reciprocal (f)
beta = fInv .* Log (Constants.One + Exp (f .* ParameterTensor ((inputDim), init='fixedValue', value=0.99537863/* 1/f*ln (e^f-1) */))) # init value is 1
TraceDense (h, what) = h # delete h and uncomment Trace to trace the beta values. They are a valuable indicator.
//Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".6" ])
result = TraceDense ( beta, 'beta') .* x
].result
else x
else x
# and the same with a scalar stabilizer shared across all components
Stabilize (x, enabled=true) = if enabled then StabilizeElements (x, inputDim=1, enabled=true) else x
@ -494,37 +545,46 @@ Parameters =
RNNs =
[
# LSTMP -- LSTM function with projection and self-stabilization
# Projection it enabled by passing different values for outputDim and cellDim.
# Projection is enabled by passing different values for outputDim and cellDim.
# This is the stateless version that takes the previous state as an input.
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
# This function also takes an optional auxiliary input, e.g. for suporting attention models.
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, aux=Constants.None, auxDim=aux.dim, prevState, enableSelfStabilization=false) =
[
S(x) = Parameters.Stabilize (x, enabled=enableSelfStabilization)
# TODO: rename to just _
_privateInnards = [ // encapsulate the inner workings
_privateInnards = [ // encapsulate the inner workings
dh = prevState.h // previous values
dc = prevState.c
// parameter macros--these carry their own weight matrices
dhs = S(dh) // previous values, stabilized
dcs = S(dc)
# note: input does not get a stabilizer here, user is meant to do that outside
// parameter macros
# note: each invocation comes with its own set of weights
B() = Parameters.BiasParam (cellDim)
W() = Parameters.WeightParam (cellDim, inputDim) // input
A() = Parameters.WeightParam (cellDim, auxDim) // aux input
H() = Parameters.WeightParam (cellDim, outputDim) // hidden-to-hidden
C() = Parameters.DiagWeightParam (cellDim) // cell-to-hiddden (note: applied elementwise)
#inputDim1 = inputDim
#W(v) = Parameters.WeightParam (cellDim, inputDim) * Parameters.StabilizeElements (v, inputDim=inputDim1, enabled=enableSelfStabilization) // input-to-hidden
# ^^ element-wise stab, use if input is a concatenation; vv stab for entire matrix
W(v) = Parameters.WeightParam (cellDim, inputDim) * Parameters.Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
H(h) = Parameters.WeightParam (cellDim, outputDim) * Parameters.Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
C(c) = Parameters.DiagWeightParam (cellDim) .* Parameters.Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
# projected contribution from input(s) and bias
pin() = if Constants.IsNone (aux)
then B() + W() * x
else B() + W() * x + A() * aux
// note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
it = Sigmoid (W(x) + B() + H(dh) + C(dc)) // input gate(t)
bit = it .* Tanh (W(x) + (H(dh) + B())) // applied to tanh of input network
it = Sigmoid (pin() + H() * dhs + C() .* dcs) // input gate(t)
bit = it .* Tanh (pin() + H() * dhs) // applied to tanh of input network
ft = Sigmoid (W(x) + B() + H(dh) + C(dc)) // forget-me-not gate(t)
bft = ft .* dc // applied to cell(t-1)
ft = Sigmoid (pin() + H() * dhs + C() .* dcs) // forget-me-not gate(t)
bft = ft .* dc // applied to cell(t-1)
ct = bft + bit // c(t) is sum of both
ct = bft + bit // c(t) is sum of both
ot = Sigmoid (W(x) + B() + H(dh) + C(ct)) // output gate(t)
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
ot = Sigmoid (pin() + H() * dhs + C() .* S(ct)) // output gate(t)
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
]
# our return values
@ -532,51 +592,397 @@ RNNs =
h = if outputDim != cellDim // output/hidden state
then [ // project
Wmr = Parameters.WeightParam (outputDim, cellDim);
htp = Wmr * Parameters.Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
].htp // TODO: ^^ extend BS syntax to allow to say: then [ Wmr = WeightParam(outputDim, cellDim) ] in Wmr * Stabilize (...)
htp = Wmr * S(_privateInnards.ht)
].htp
else _privateInnards.ht // no projection
dim = outputDim
]
# helper function to delay h and c
# Callers can provide their own, e.g. useful for beam decoding.
PreviousHC (lstmState) = [
PreviousHC (lstmState, layerIndex=0) = [
h = Loop.Previous (lstmState.h) // hidden state(t-1)
c = Loop.Previous (lstmState.c) // cell(t-1)
dim = lstmState.dim
]
# pass previousHook=BS.RNNs.NextHC instead of PreviousHC to get a right-to-left recurrence
NextHC (lstmState) = [
NextHC (lstmState, layerIndex=0) = [
h = Loop.Next (lstmState.h) // hidden state(t-1)
c = Loop.Next (lstmState.c) // cell(t-1)
dim = lstmState.dim
]
NoAuxInputHook (input, lstmState) = Constants.None
# this implements a recurrent (stateful) LSTM with projection and self-stabilization
# It returns a record (h,c). To use its output, say .h
# By default, this is left-to-right. Pass previousHook=BS.RNNs.NextHC for a right-to-left model.
# TODO: remove the -2 once this works
RecurrentLSTMP = RecurrentLSTMP2
RecurrentLSTMP2 (outputDim, cellDim=outputDim.dim, x, inputDim=x.dim, previousHook=PreviousHC, enableSelfStabilization=false) =
RecurrentLSTMP (outputDim/*h.dim*/, cellDim=outputDim,
x, inputDim=x.dim,
previousHook=BS.RNNs.PreviousHC,
augmentInputHook=NoAuxInputHook, augmentInputDim=0,
layerIndex=0,
enableSelfStabilization=false) =
[
prevState = previousHook (lstmState)
inputDim1 = inputDim ; cellDim1 = cellDim ; enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // we return the state record (h,c)
enableSelfStabilization1 = enableSelfStabilization ; cellDim1 = cellDim ; inputDim1 = inputDim ; layerIndex1 = layerIndex # workaround
prevState = previousHook (lstmState, layerIndex=layerIndex1) # recurrent memory. E.g. Previous or Next, with or without initial state, beam reordering etc.
auxInput = augmentInputHook(x, prevState) # optionally augment input. Constants.None if none.
lstmState = BS.RNNs.LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, aux=auxInput, auxDim=augmentInputDim, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // that's the value we return
# a stack of recurrent LSTMs (unidirectional)
RecurrentLSTMPStack = RecurrentLSTMP2Stack # TODO: remove the -2 name once this works
RecurrentLSTMP2Stack (hiddenDims, cellDims=hiddenDims, input, inputDim=input.dim, previousHook=PreviousHC, enableSelfStabilization=false) = [
previousHook1 = previousHook ; useStabilizer = enableSelfStabilization
layers[i:0..Length (hiddenDims)-1] =
RecurrentLSTMP2 (hiddenDims[i], cellDim=cellDims[i],
if i == 0 then input else layers[i-1].h, inputDim=if i == 0 then inputDim else hiddenDims[i-1] /*TODO: layers[i-1].dim*/,
previousHook=previousHook1,
enableSelfStabilization=useStabilizer)
RecurrentLSTMPStack (layerDims, cellDims=layerDims,
input, inputDim=input.dim,
previousHook=PreviousHC,
augmentInputHook=NoAuxInputHook, augmentInputDim=0,
enableSelfStabilization=false) =
[
previousHook1 = previousHook ; useStabilizer = enableSelfStabilization ; augmentInputHook1 = augmentInputHook ; augmentInputDim1 = augmentInputDim
layers[i:0..Length (layerDims)-1] =
RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
if i == 0 then input else Parameters.Stabilize (layers[i-1].h, enabled=useStabilizer), inputDim=if i == 0 then inputDim else layers[i-1].dim,
previousHook=previousHook1,
augmentInputHook=if i == 0 then augmentInputHook1 else NoAuxInputHook, augmentInputDim=if i == 0 then augmentInputDim1 else 0,
layerIndex=i,
enableSelfStabilization=useStabilizer)
].layers
# a stack of recurrent LSTMs (bidirectional)
# TODO: Should we define layerDims as the total (sum of both forward and backward direction)?
RecurrentBirectionalLSTMPStack (layerDims, cellDims=layerDims, input, inputDim=input.dim, previousHook=PreviousHC, nextHook=NextHC, enableSelfStabilization=false) = [
previousHook1 = previousHook ; nextHook1 = nextHook ; useStabilizer = enableSelfStabilization
layers[i:0..Length (layerDims)-1] =
[
v = if i == 0 then input else Parameters.Stabilize (layers[i-1].h, enabled=useStabilizer)
vDim = if i == 0 then inputDim else layers[i-1].dim
fwd = RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
v, inputDim=vDim,
previousHook=previousHook1,
layerIndex=i,
enableSelfStabilization=useStabilizer)
bwd = RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
v, inputDim=vDim,
previousHook=nextHook1,
layerIndex=i,
enableSelfStabilization=useStabilizer)
h = Splice ((fwd.h : bwd.h), axis=1)
c = Splice ((fwd.c : bwd.c), axis=1)
dim = layerDims[i] * 2 # output dimension
]
].layers
]
##############################################################################
# Network operations
# sequence-to-sequence models
# This implements attention model and beam decoding.
##############################################################################
Seq2Seq =
[
# attention model
# The attention model is an additional input vector to the LSTM.
# Here, it is implemented by augmenting this vector to the regular input of the LSTM.
# The RecurrentLSTMP function does this inside through an optional lambda that the caller can pass in.
# This function creates such a lambda, which augments the input vector from a fixed-size attention window.
CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=false) =
[
# attention (fixed rolling window)
attentionWindow = Sequences.PastValueWindow (attentionSpan, encoderOutput.h, axis=2) # BUGBUG: We should have this in axis=3 right away for beam search. Track this down.
S(x) = Parameters.Stabilize (x, enabled=enableSelfStabilization)
# project it for Tanh() expression
# expected to be [attentionDim x 1 x attentionSpan], where that 1 is the axis of the beam in beam decoding
projectedAttentionWindowBroadcast = [
W = Parameters.WeightParam (attentionDim, encoderOutput.dim)
# inject an additional singleton dimension at second axis, as a stand-in for the beam depth in decoding
InjectBeamDepth (node) = SplitDimension (node, /*axis*/1, /*N:*/1)
#projectedValue = Sequences.BroadcastSequenceAs (decoderDynamicAxis, InjectBeamDepth (W * attentionWindow.value)) # apply the projection columnwise to the attentionWindow tensor
projectedValue = if enableSelfStabilization # apply the projection columnwise to the attentionWindow tensor
then Sequences.BroadcastSequenceAs (decoderDynamicAxis, InjectBeamDepth (W * S(attentionWindow.value .* attentionWindow.valid))) # (mask invalid frames for stabilizer)
else Sequences.BroadcastSequenceAs (decoderDynamicAxis, InjectBeamDepth (W * attentionWindow.value))
value = Sequences.BroadcastSequenceAs (decoderDynamicAxis, InjectBeamDepth ( attentionWindow.value))
valid = Sequences.BroadcastSequenceAs (decoderDynamicAxis, InjectBeamDepth ( attentionWindow.valid))
dim = encoderOutput.dim
]
# the return value of this function is this lambda, which gets passed to the RecurrentLSTMP() function as the augmentInputHook parameter
AugmentInputHook (input, prevState) =
[
# compute additional hidden state from attention
outputDim = prevState.dim
W = Parameters.WeightParam (attentionDim, outputDim)
projectedH = W * S(prevState.h) # [outputDim] or [outputDim x D] in beam search
tanHOut = Tanh (projectedAttentionWindowBroadcast.projectedValue + projectedH) # [attentionDim x beamDepth x attentionSpan]
# You can enable (uncomment) these Trace macros to enable tracing of the attention weights, which is a useful indicator.
TraceDense (h, what) = h
//Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
TraceDenseTransposed (h, what) = h
//Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
v = TraceDenseTransposed( Parameters.WeightParam (1, attentionDim) ,'v') # [1 x attentionDim]
u = v * S(tanHOut .* projectedAttentionWindowBroadcast.valid) # [1 x beamDepth x attentionSpan]
# ^^ mask 'v' for purpose of stabiliziation; TODO: don't do that if no stabiliziation
uValid = u + Log (projectedAttentionWindowBroadcast.valid) # [1 x beamDepth x attentionSpan]
attentionWeights = Softmax (uValid, axis=3) # [1 x beamDepth x attentionSpan]
weightedAttentionWindow = projectedAttentionWindowBroadcast.value .* TraceDense( attentionWeights ,'weights') # [encoderHiddenDim x beamDepth x attentionSpan]
# TODO: use ReduceSum:
# this is the auxiliary input to the LSTMP function
weightedAttentionAverage = S(Times (weightedAttentionWindow, BS.Constants.OnesTensor (attentionSpan), outputRank=2)) # [encoderHiddenDim x beamDepth]
].weightedAttentionAverage
].AugmentInputHook
# helper macro that extracts top D hypotheses from a 2D tensor
# input: scores[w,n] w = word index, d = hyp index in beam (d=0 is the best one)
# output: [w,n1,n2] n1 = input hyp index (prev top N); n2 = output hyp index (new top N)
# e.g. 4 words, beam 3; view this as 3 [4x3] planes "drawn" 3-dimensionally, with depth being the 3rd tensor index
GetTopNTensor (D, scores) = [
# recurse over up to D elements
# In each recursion:
# - pick the best over (w,n)
# - subtract it out from scores
recursion[n:0..D-1] =
[
curBestScores = if n == 0 # scores excluding paths better than rank n
then scores # top: just the path scores
else recursion[n - 1].nextBestScores # next: path scores after removing all we already got
best = Hardmax (curBestScores) # best = one-hot over (w,n)
nextBestScores = curBestScores + Constant (-1e30) .* best # set the ones we've already got to -INF
# TODO: use proper -INF; e.g. -1/0 in BS. Needs to be tested thoroughly.
]
# splice them together into a single tensor
asArray[n:0..D-1] = recursion[n].best # this is a BS array consisting only of the 'best' field ('from r in recursion select r.best')
spliced = Splice (axis = 3, asArray) # convert BS array index n to tensor index n1
].spliced
# Create a greedy decoder model from an existing trained model.
# The input model is expected to have these nodes:
# - decoderHistoryFromOutput: the decoding output of a time step (Hardmax (outputProbability))
# - decoderHistoryHook: a node that is the word sequence that will be used as the history for the next time step
# In training, this is the label sequence.
# In greedy decoding, it must be decoderHistoryHook = decoderHistoryFromOutput
# - z: scaled log prediction probability --TODO: rename this: scoreSequence = Pass (z)
# - inputSequence
# - labelSequence (only passed through for scoring, not used in decoding)
# The returned model has the following one-hot outputs:
# - decodedSequence --TODO: currently decodeOut; rename this
# - inputSequence
# - labelSequence
# To decode greedily, in "write" or "eval" specify the model as:
# BrainScriptNetworkBuilder = (BS.S2S.GreedySequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$")))
GreedySequenceDecoderFrom (modelAsTrained) = [
scoreSequence = modelAsTrained.z
decodeOut = Pass ( Hardmax (scoreSequence), tag='output')
inputsOut = Pass (modelAsTrained.inputSequence, tag='output')
labelsOut = Pass (modelAsTrained.labelSequence, tag='output')
model = BS.Network.Edit (modelAsTrained,
#BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.decoderInput/*delayedDecoderFeedback*/, delayedDecoderFeedback),
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.decoderHistoryHook, modelAsTrained.decoderHistoryFromOutput),
decodeOut : inputsOut : labelsOut)
].model
# turning a regular LSTM to a top-N beam-search decoder:
# - add a depth axis of dimension N to all nodes inside the decoder loop
# - only needs the init signal for PastValue to be that
# - h and c must be shuffled versions of their PastValue
# - since what are the top N in one time step is not the top N in the next
# - reshufling and adding depth to the init signal can be done at the same place
# - decoder output must determine the top N and a reshuffling matrix for h and c
# - the current Hardmax needs to be replaced by something that outputs these (output depth N)
# - we get a N^2 depth: [V x (input set) x (top N output hypos)]
# - reshuffling matrix is reduction over V (multiply with row of V ones) plus possibly a transposition
# - we need an accumulated path score
# - start value constructed by stacking a 0 and N-1 -INF
# - for testing, we can output the current best in each step
# - that's a Slice()
# - traceback is a right-to-left recurrence
# - output best hypo conditioned on the path (it is already known)
# beam search of width 'beamDepth'
BeamSearchSequenceDecoderFrom (modelAsTrained, beamDepth) = [
scoreSequence = modelAsTrained.z
vocabSize = scoreSequence.dim
# TODO: use ReduceSum
ReduceAxis (axisDim, x, axis=1) = # unfortunately, we must feed in the dimension of the axis, it can't be inferred
if axis == 1 then Times (Constants.OnesTensor (axisDim), x, outputRank=0)
else if axis == 2 then ReduceAxis (axisDim, TransposeDimensions (x, 1, 2), axis=1)
else Fail("ReduceAxis: Only supports axes 1 and 2.")
# === BEGIN DECODER ===
# constants for initial score and final traceback
initialPathScores = FirstAndOther (0, LOGZERO, beamDepth, axis = 2) # [1 x D]: [ 0, -INF, -INF, -INF, ... ]
finalHyp = FirstAndOther (1, 0, beamDepth, axis = 1) # [D] the final token is the top-scoring hypothesis, that is, hyp[0]
# path expansion of the D hypotheses that were best in previous time step (ordered as in previous time step)
logLLs = Columnwise (LogSoftmax, beamDepth, scoreSequence) # [V x Dprev] log P(w|hist)
expandedPathScores = logLLs + Boolean.If (Loop.IsFirst (logLLs), initialPathScores, Loop.Previous (tokens.score)) # [V x Dprev] log (P(w|hist) * P(hist)) for all top D hypotheses
# determine top D of expanded paths
topPaths = GetTopNTensor (beamDepth, expandedPathScores) # [V x Dprev] -> [V x Dprev x Dnew]
topPathScores = topPaths .* expandedPathScores # [V x Dprev x Dnew]
# form new decoding token, by reducing topPaths(Scores) along relevant dimensions
tokens = [ # [. x Dnew]
from = ReduceAxis (axis=1, vocabSize, topPaths) # [Dprev x Dnew], reduced over V
word = ReduceAxis (axis=2, beamDepth, topPaths) # [V x Dnew], reduced over Dprev
score = Constants.OnesTensor (1/*output dim*/ : /*reduction dims: */vocabSize : beamDepth/*Dprev*/) * topPathScores # [1 x Dnew], reduced over [V x Dprev] and inserted a '1'
]
# network feedback for next time step
# BUGBUG: Need to import EmbedLabels functionality from models
decoderFeedback = /*EmbedLabels*/ (tokens.word) # [embeddingDim x Dnew]
delayedDecoderFeedback = Boolean.If (Loop.IsFirst (labelSentenceStartEmbeddedScattered), labelSentenceStartEmbeddedScattered, Loop.Previous (decoderFeedback))
# final traceback
traceback = Boolean.If (Loop.IsLast (modelAsTrained.labelSentenceStartEmbeddedScattered/*tokens.from*/), finalHyp, Loop.Next (tokens.from * traceback)) # [D] one-hot, multiplying tokens.from from the left will select another one-hot row of tokens.from
decodeHyp = Times (topPaths, traceback, outputRank=2) # [V x Dprev] 2D one-hot, selected the best hyp according to traceback
decode = decodeHyp * Constants.OnesTensor (beamDepth) # [V] reduces over Dprev -> 1D one-hot
# TODO: Can this be done in one ^^ go?
# === END DECODER ===
# propagate LSTM state to the right top-N rank given where that rank came from in the previous time step
# PropagateTopN:
# tokens.from: [Dprev, Dnew]
# v--------- best came from input hyp[1]
# v------- second best came from input hyp[0]
# v----- third best came from input hyp[2]
# 0 1 0
# 1 0 0
# 0 0 1
# tokens.from[:,n] one-hot encodes the best predecessor at top-N rank n
# each column is a one-hot vector
# multiplying with such a column from the right will select the column represented by the one-hot value
# logLLs: get decoder log likelihoods
# initialPathScores: decoder start token: 0 for first hyp, -INF for the others
LOGZERO = -1e30
# expandedPathScores: path expansion, [V x 1] + [1 x D] -> [V x D]
# topPaths:
# +-----+
# |0 0 0|
# |0 0 0|-+
# |0 1 0|0| means word[2] in input hyp[1] was the best
# |0 0 0|0|-+
# +-----+0|0|
# |1 0 0|0| means word[3] in input hyp[0] was the second best
# +-----+1| means word[2] in input hyp[2] was the third best
# |0 0 0|
# +-----+
# tokens.word:
#tokens.word = ReduceSum (axis=2, topPaths) # TODO: add an axis parameter to SumColumnElements()
# +-+
# |0|
# |0|-+
# |1|0| means word[2] in input hyp[1] was the best
# |0|0|-+
# +-+0|0|
# |1|0| means word[3] in input hyp[0] was the second best
# +-+1| means word[2] in input hyp[2] was the third best
# |0|
# +-+
# tokens.from:
# before dropping the first dimension: [V x Dprev x Dnew]
# +-----+
# |0 1 0| means input hyp[1] gave rise to the best
# +-----+-+
# |1 0 0| means input hyp[0] gave rise to second best
# +-----+-+
# |0 0 1| means input hyp[2] gave rise to third best
# +-----+
# after: [Dprev x Dnew] e.g. "0 1 0" goes into first column, vertically
# v--------- best came from input hyp[1]
# v------- second best came from input hyp[0]
# v----- third best came from input hyp[2]
# 0 1 0
# 1 0 0
# 0 0 1
# tokens.from[:,n] one-hot encodes the best predecessor at top-N rank n
# topPathScores:
# +-----+
# |0 0 0|
# |0 0 0|-+
# |0 x 0|0| x denotes the accumulated path score max_w P(w|hyp[1])
# |0 0 0|0|-+
# +-----+0|0|
# |y 0 0|0| y denotes the accumulated path score max_w P(w|hyp[0])
# +-----+z| z denotes the accumulated path score max_w P(w|hyp[2])
# |0 0 0|
# +-----+
# traceback:
# last state: take Hardmax over tokens.score
# previous states: multiply wth respective tokens.from matrix
# -> hyp index for every time step
# then finally use that to select the actual output TODO: That's a sample-wise matrix product between two sequences!!!
# TODO: condition must be 1-dim, not 2-dim tensor, so we use labelSentenceStartEmbeddedScattered instead of tokens.from
# +-+
# |0|
# |1| means at this time step, hyp[1] was the best globally
# |0|
# +-+
# decode: and the actual decoding output
# This is the one to output (top sentence-level hypothesis after traceback).
# traceback : [Dnew]
# topPaths : [V x Dprev x Dnew]
# +-----+
# |0 0 0|
# |0 0 0|-+
# |0 1 0|0| means word[2] in input hyp[1] was the best
# |0 0 0|0|-+
# +-----+0|0|
# |1 0 0|0| means word[3] in input hyp[0] was the second best
# +-----+1| means word[2] in input hyp[2] was the third best
# |0 0 0|
# +-----+
# helper macros --> move to BS.core.bs
Columnwise (f, beamDepth, z) = # TODO: Takes LogSoftmax over axis=1. it is more tricky to do this over arbitrary axes
[
cols[d:0..beamDepth-1] = f (Slice (d, d+1, z, axis=2) /*[:,d]*/ )
out = Splice (cols, axis=2)
].out
FirstAndOther (firstVal, otherVals, N, axis = 1) = if N == 1 then ConstantTensor (firstVal, (1)) else [
axis1 = axis # TODO: Is this really necessary? Why? Then we need the syntax axis = ^.axis or ^axis
out = if axis == 1 # maybe this can be unified or pushed into Splice?
then RowStack (ConstantTensor (firstVal, (1)) : ConstantTensor (otherVals, (N -1))) # col vector: [ 1; 0; 0; 0 ... ]
else Splice (Constant (firstVal) : ConstantTensor (otherVals, (1 : N -1)), axis = axis1 /*, axis*/) # row vector: [ 0, -INF, -INF, -INF, ... ]
].out
model = BS.Network.Edit (modelAsTrained,
(
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.beamSearchReorderHook, tokens.from) : # reorder LSTM states
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.decoderHistoryHook, decoderFeedback) # feed decoder output back in
),
(inputsOut : labelsOut : decodeOut)) # additional roots
inputsOut = Pass (modelAsTrained.inputSequence, tag='output')
labelsOut = Pass (modelAsTrained.labelSequence, tag='output')
decodeOut = Pass (decode, tag='output')
].model
]
##############################################################################
# Network-level operations
# These operations will have undefined behavior for input values != 0 or 1.
##############################################################################

Просмотреть файл

@ -707,14 +707,14 @@ int wmain1(int argc, wchar_t* argv[]) // called from wmain which is a wrapper th
catch (const ScriptableObjects::ScriptingException& err)
{
fprintf(stderr, "\n");
LOGPRINTF(stderr, "EXCEPTION occurred: %s\n", err.what());
err.PrintError();
err.PrintError(ProgressTracing::GetTimeStampPrefix() + L"EXCEPTION occurred");
return EXIT_FAILURE;
}
catch (const IExceptionWithCallStackBase& err)
{
fprintf(stderr, "\n");
LOGPRINTF(stderr, "EXCEPTION occurred: %s\n%s", dynamic_cast<const std::exception&>(err).what(), err.CallStack());
fprintf(stderr, "%s", err.CallStack());
LOGPRINTF(stderr, "EXCEPTION occurred: %s\n", dynamic_cast<const std::exception&>(err).what());
return EXIT_FAILURE;
}
catch (const std::exception& err)

Просмотреть файл

@ -54,7 +54,7 @@
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\ActionsLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\ActionsLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLibPath)</AdditionalLibraryDirectories>
@ -146,6 +146,7 @@
<ClInclude Include="..\Common\Include\Basics.h" />
<ClInclude Include="..\Common\Include\BestGpu.h" />
<ClInclude Include="..\Common\Include\DataReader.h" />
<ClInclude Include="..\Common\Include\CompositeDataReader.h" />
<ClInclude Include="..\Common\Include\ExceptionWithCallStack.h" />
<ClInclude Include="..\Common\Include\StringUtil.h" />
<ClInclude Include="..\Common\Include\TensorShape.h" />
@ -164,6 +165,20 @@
<ClInclude Include="..\Math\Matrix.h" />
<ClInclude Include="..\ComputationNetworkLib\PreComputeNodes.h" />
<ClInclude Include="..\ComputationNetworkLib\MatrixPool.h" />
<ClInclude Include="..\Readers\ReaderLib\BlockRandomizer.h" />
<ClInclude Include="..\Readers\ReaderLib\Bundler.h" />
<ClInclude Include="..\Readers\ReaderLib\ChunkRandomizer.h" />
<ClInclude Include="..\Readers\ReaderLib\DataDeserializer.h" />
<ClInclude Include="..\Readers\ReaderLib\MemoryProvider.h" />
<ClInclude Include="..\Readers\ReaderLib\NoRandomizer.h" />
<ClInclude Include="..\Readers\ReaderLib\Packer.h" />
<ClInclude Include="..\Readers\ReaderLib\Reader.h" />
<ClInclude Include="..\Readers\ReaderLib\SampleModePacker.h" />
<ClInclude Include="..\Readers\ReaderLib\SequencePacker.h" />
<ClInclude Include="..\Readers\ReaderLib\SequenceRandomizer.h" />
<ClInclude Include="..\Readers\ReaderLib\StringToIdMap.h" />
<ClInclude Include="..\Readers\ReaderLib\Transformer.h" />
<ClInclude Include="..\Readers\ReaderLib\TransformerBase.h" />
<ClInclude Include="..\SGDLib\DataReaderHelpers.h" />
<ClInclude Include="..\SGDLib\SGD.h" />
<ClInclude Include="..\SGDLib\SimpleEvaluator.h" />

Просмотреть файл

@ -47,6 +47,9 @@
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\CompositeDataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Common\Include\fileutil.h">
@ -166,6 +169,51 @@
<ClInclude Include="..\Common\Include\ExceptionWithCallStack.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\BlockRandomizer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\Bundler.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\ChunkRandomizer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\DataDeserializer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\MemoryProvider.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\NoRandomizer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\Packer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\Reader.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\SampleModePacker.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\SequencePacker.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\SequenceRandomizer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\StringToIdMap.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\Transformer.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Readers\ReaderLib\TransformerBase.h">
<Filter>from ReaderLib</Filter>
</ClInclude>
<ClInclude Include="..\Common\Include\CompositeDataReader.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="modelEditor.txt">
@ -224,6 +272,9 @@
<Filter Include="BrainScript\CNTKCoreLib">
<UniqueIdentifier>{899f31fa-5906-4485-8875-14ad2c43ed8f}</UniqueIdentifier>
</Filter>
<Filter Include="from ReaderLib">
<UniqueIdentifier>{28bc457a-d2f4-4f42-a9aa-89f22e909ab0}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="prebuild.bat">

Просмотреть файл

@ -89,6 +89,8 @@ DataReader::DataReader(const ConfigRecordType& config)
string precision = config(L"precision", "float");
bool hasMultipleReaders = config.Exists(L"readers");
// In case when deserializers are specified, use the new logic to compose them.
bool hasDeserializers = config.Exists(L"deserializers");
if (hasMultipleReaders)
{
vector<wstring> ioNames = config(L"readers", ConfigRecordType::Array(stringargvector()));
@ -103,6 +105,16 @@ DataReader::DataReader(const ConfigRecordType& config)
getReaderProc(&m_dataReaders[ioName]); // instantiates the reader with the default constructor (no config processed at this point)
}
}
else if (hasDeserializers)
{
// Creating Composite Data Reader that allow to combine deserializers.
// This should be changed to link statically when SGD uses the new interfaces.
wstring ioName = L"ioName";
GetReaderProc getReaderProc = (GetReaderProc)Plugin::Load(config(L"readerType", L"CompositeDataReader"), GetReaderName(precision));
m_ioNames.push_back(ioName);
assert(getReaderProc != nullptr);
getReaderProc(&m_dataReaders[ioName]);
}
else // legacy
{
wstring ioName = L"ioName";
@ -202,7 +214,7 @@ bool DataReader::GetMinibatch(StreamMinibatchInputs& matrices)
if (nbr > 0)
m_dataReaders[m_ioNames[i]]->SetNumParallelSequences(nbr); // the first one determines the param of all others --TODO: This is flimsy.
bRet &= m_dataReaders[m_ioNames[i]]->GetMinibatch(matrices);
size_t thisNbr = m_dataReaders[m_ioNames[i]]->GetNumParallelSequences();
size_t thisNbr = m_dataReaders[m_ioNames[i]]->GetNumParallelSequencesForFixingBPTTMode();
if (nbr == 0)
nbr = thisNbr;
else if (thisNbr != nbr)
@ -235,15 +247,15 @@ bool DataReader::GetHmmData(msra::asr::simplesenonehmm* hmm)
return bRet;
}
size_t DataReader::GetNumParallelSequences()
size_t DataReader::GetNumParallelSequencesForFixingBPTTMode()
{
size_t nNbr = 0;
for (size_t i = 0; i < m_ioNames.size(); i++)
{
IDataReader* ptr = m_dataReaders[m_ioNames[i]];
if (nNbr == 0)
nNbr = ptr->GetNumParallelSequences();
else if (nNbr != ptr->GetNumParallelSequences())
nNbr = ptr->GetNumParallelSequencesForFixingBPTTMode();
else if (nNbr != ptr->GetNumParallelSequencesForFixingBPTTMode())
LogicError("GetNumParallelSequences: number of slices in each minibatch not consistent for these streams");
}
return nNbr;

Просмотреть файл

@ -168,7 +168,10 @@ public:
{
NOT_IMPLEMENTED;
};
virtual size_t GetNumParallelSequences() = 0;
// TODO: Should be removed when BPTT follows proper minibatch size.
virtual size_t GetNumParallelSequencesForFixingBPTTMode() = 0;
//virtual int GetSentenceEndIdFromOutputLabel() { return -1; }
virtual void SetNumParallelSequences(const size_t sz)
{
@ -337,7 +340,7 @@ public:
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& boundaries, vector<size_t>& extrauttmap);
virtual bool GetHmmData(msra::asr::simplesenonehmm* hmm);
size_t GetNumParallelSequences();
size_t GetNumParallelSequencesForFixingBPTTMode();
//int GetSentenceEndIdFromOutputLabel();
//bool RequireSentenceSeg() const override;

Просмотреть файл

@ -28,6 +28,32 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElemType>
class IEvaluateModelBase
{
public:
//
// Load a model based on configuration. The syntax is the same as when calling the cntk executable.
// e.g. "modelFile=model.dat deviceId=0".
// numCPUThreads can be used to set the thread count of BLAS.
//
virtual void Init(const std::string& config) = 0;
//
// Create a network based on an (NDL) network description.
//
virtual void CreateNetwork(const std::string& networkDescription) = 0;
//
// Free resources
//
virtual void Destroy() = 0;
};
// ------------------------------------------------------------------------
// Basic (legacy) interface
// ------------------------------------------------------------------------
enum NodeGroup
{
nodeInput, // an input node
@ -39,33 +65,54 @@ enum NodeGroup
// NOTICE: This interface is a public interface for evaluating models in CNTK.
// Changes to this interface may affect other projects, such as Argon and LatGen,
// and therefore need to be communicated with such groups.
template <class ElemType>
class IEvaluateModel // Evaluate Model Interface
template <typename ElemType>
class IEvaluateModel : public IEvaluateModelBase<ElemType> // Evaluate Model Interface
{
public:
virtual void Init(const std::string& config) = 0;
virtual void Destroy() = 0;
virtual void CreateNetwork(const std::string& networkDescription) = 0;
//
// Retrieves the (flattened) dimensions
//
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup) = 0;
//
// Allocate resources for a particular output.
//
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName) = 0;
//
// Evaluate a model in frame mode. This does not support dynamic axes or sparse input data.
// Given a feature vector of dimension d, the inputs may contain n * d elements. The output will then be computed
// for n samples.
// inputs - map from node name to array of input tensors, flattened to vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
//
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
//
// Evaluate - Evaluate using the network without input and provide the outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
//
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
virtual void ResetState() = 0;
};
// GetEval - get a evaluator type from the DLL
// since we have 2 evaluator types based on template parameters, exposes 2 exports
// could be done directly with the templated name, but that requires mangled C++ names
template <class ElemType>
template <typename ElemType>
void EVAL_API GetEval(IEvaluateModel<ElemType>** peval);
extern "C" EVAL_API void GetEvalF(IEvaluateModel<float>** peval);
extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval);
// Data Reader class
// interface for clients of the Data Reader
// mirrors the IEvaluateModel interface, except the Init method is private (use the constructor)
template <class ElemType>
template <typename ElemType>
class Eval : public IEvaluateModel<ElemType>, protected Plugin
{
private:
@ -84,6 +131,7 @@ public:
// modelPath=c:\models\model.dnn (model path, if not specified, must call LoadModel() method before Evaluate()
// minibatchSize=1024 (minibatch size used during evaluation if < passed data size)
Eval(const std::string& config);
virtual ~Eval();
// CreateNetwork - create a network based on the network description
@ -101,14 +149,146 @@ public:
// Evaluate - Evaluate using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
// Evaluate - Evaluate using the network without input, and provide the outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will
// happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Init(const std::string& config);
virtual void ResetState();
};
// ------------------------------------------------------------------------
// Extended interface
// ------------------------------------------------------------------------
//
// A buffer to keep data for all samples in a (variable length) sequence
// from a single input or output.
// This is used for both dense and sparse data.
//
template<typename ElemType>
struct VariableBuffer
{
size_t m_numberOfSamples = 0;
//
// All elements of a sequence, concatenated.
//
std::vector<ElemType> m_buffer;
// In case of sparse data, the following is also used. Otherwise, the
// contents are ignored.
// E.g. a sequence of three sparse vectors with 2 / 4 / 2 non-zero values
// could be represented as the following:
// colIdx: 0 2 6 8
// v v v v
// indices 1 3 2 3 5 6 2 7
// buffer 0 1 2 3 4 5 6 7
//
// For every element in buffer, an entry in this array gives its position.
// For every vector the entries must be ascending.
//
std::vector<int> m_indices;
//
// Contains numberOfsamples + 1 indices into the buffer. The first entry
// is always 0. The last entry points after the last element.
// See http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc
//
std::vector<int> m_colIndices;
};
//
// Meta data
//
struct VariableLayout
{
enum DataType
{
Float32,
Float64
};
enum StorageType
{
Undetermined,
Dense,
Sparse,
};
// Name of the input
std::wstring m_name;
DataType m_dataType;
StorageType m_storageType;
// Dimension of the tensor, flattened to 1 dimension, for one entry on the dynamic axis.
// E.g. for a tensor [2,3,*] this would be 6.
int m_numElements;
// Name of the axis, potentially shared between inputs. For any two inputs sharing the same
// dynamic axis, the sequence cardinality must be the same.
std::wstring m_dynamicAxisName;
};
template <typename ElemType>
using Variables = std::vector<VariableBuffer<ElemType>>;
using VariableSchema = std::vector<VariableLayout>;
//
// Extended interface, allowing for sparse input.
//
template <typename ElemType>
class IEvaluateModelExtended : public IEvaluateModelBase<ElemType>
{
public:
//
// GetOutputSchema - retrieve information about tensor shapes and memory layout of the outputs for this
// model.
//
virtual VariableSchema GetOutputSchema() const = 0;
//
// Allocate internal state for calling ForwardPass(). The call restricts the network (inputs and outputs)
// to the functions represented by the output name.
//
virtual void StartForwardEvaluation(std::vector<std::wstring> outputs) = 0;
//
// GetVariableLayout - retrieve information about tensor shapes and memory layout of inputs necessary for a
// particular output. By default this returns all available inputs. After StartForwardEvaluation(), this
// returns all the inputs necessary to compute the outputs.
//
virtual VariableSchema GetInputSchema() const = 0;
//
// Evaluate - Evaluate (perform a forward pass for) a single unit using the model with the given inputs and
// outputs.
// The layout and shape of the data in inputs vector must match the schema returned by GetInputLayouts.
// This method is not reentrant, as the forward pass keeps internal state.
// outputId - output to compute values for. See GetOutputLayouts()
// inputs - vector of input buffers, one for every input as given by GetInputLayouts()
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing
// will happen during evaluation.
// Called after StartForwardEvaluation()
//
virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) = 0;
};
template <typename ElemType>
void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval);
extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended<float>** peval);
extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval);
} } }

Просмотреть файл

@ -4,25 +4,25 @@
//
#pragma once
#include "Basics.h"
#include <chrono>
#include "TimerUtility.h"
#include <string>
namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: make this proper C++ functions with variadic templates and a name that reflects their difference to fprintf(stderr) which already implies printing to log
// If the Tracing flag is set, print out a timestamp with no new line at the end
#define PREPENDTS(stream) \
do \
{ \
if (ProgressTracing::GetTimestampingFlag()) \
{ \
std::time_t tt = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); \
char mbstr[30]; \
if (std::strftime(mbstr, sizeof(mbstr), "%m/%d/%Y %H:%M:%S", std::localtime(&tt))) \
fprintf(stream, "%s: ", mbstr); \
char mbstr[30]; \
fprintf(stream, "%s: ", ProgressTracing::Timestamp(mbstr)); \
} \
} while(0)
// TODO: make this proper C++ functions with variadic templates and a name that reflects their difference to fprintf(stderr) which already implies printing to log
// Print out a log message. If the Tracing flag is set, prepend with a timestamp
#define LOGPRINTF(stream, ...) \
do \
@ -80,6 +80,22 @@ public:
// TODO: timestampFlag or timestampingFlag? (Or timeStampFlag?)
}
template<unsigned int N>
static const char* Timestamp(char(&buf)[N])
{
std::time_t tt = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
if (!std::strftime(buf, _countof(buf), "%m/%d/%Y %H:%M:%S", std::localtime(&tt)))
LogicError("Timestamp: Buffer too small.");
return buf;
}
// helper to return a time-stamp prefix if time-stamping enabled, complete with ': ' at its end
static std::wstring GetTimeStampPrefix()
{
char mbstr[30];
return GetTimestampingFlag() ? msra::strfun::wstrprintf(L"%s: ", Timestamp(mbstr)) : L"";
}
static void SetTracingFlag()
{
auto& us = GetStaticInstance();
@ -167,4 +183,5 @@ public:
return newNumItersSinceLastPrintOfProgress;
}
};
} } }
}}}

Просмотреть файл

@ -25,11 +25,11 @@ class ScriptingException : public runtime_error
{
public:
template <typename M>
ScriptingException(const M &msg)
: runtime_error(msg)
ScriptingException(const M &msg) :
runtime_error(msg)
{
}
virtual void PrintError() const = 0;
virtual void PrintError(const std::wstring& linePrefix) const = 0;
};
// -----------------------------------------------------------------------

Просмотреть файл

@ -17,7 +17,6 @@
#include "latticestorage.h"
#include "simple_checked_arrays.h"
#include "fileutil.h"
#include <stdint.h>
#include <vector>
#include <string>
#include <unordered_map>

Просмотреть файл

@ -491,27 +491,30 @@ void ComputationNetwork::CollectInputAndLearnableParametersRec(const Computation
}
template <class ElemType>
/*static*/ void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, unsigned long& dropOutSeed)
/*static*/ void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase)
{
list<ComputationNodeBasePtr> dropoutNodes = net->GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
if (dropoutRate != prevDropoutRate)
{
fprintf(stderr, "Setting dropout rate to %.8g.\n", dropoutRate);
// TODO: Change this to use an interface that is independent of <ElemType>.
list<ComputationNodeBasePtr> dropoutNodes = net->GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
if (dropoutNodes.size() == 0 && dropoutRate > 0)
fprintf(stderr, "WARNING: there is no dropout node.\n");
else
{
for (auto& nodeIter: dropoutNodes)
{
auto node = dynamic_pointer_cast<DropoutNode<ElemType>>(nodeIter);
node->SetDropoutRate(dropoutRate);
node->SetRandomSeed(dropOutSeed++);
}
}
prevDropoutRate = dropoutRate;
fprintf(stderr, "WARNING: Attempting to set dropout rate, but there is no dropout node in the network.\n");
}
// Each dropout node gets a distinct seed. The actual seed for each dropout node is computed as follows:
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx
size_t randSeed = randSeedBase * dropoutNodes.size();
for (auto& nodeIter : dropoutNodes)
{
auto node = dynamic_pointer_cast<DropoutNode<ElemType>>(nodeIter);
if (dropoutRate != prevDropoutRate)
node->SetDropoutRate(dropoutRate);
node->SetRandomSeed(randSeed);
randSeed++;
}
prevDropoutRate = dropoutRate;
}
template <class ElemType>
@ -1441,7 +1444,7 @@ template void ComputationNetwork::InitLearnableParameters<float>(const Computati
template void ComputationNetwork::Read<float>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<float>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/ void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, unsigned long& dropOutSeed);
template /*static*/ void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetBatchNormalizationTimeConstants<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double normalizationTimeConstant, double& prevNormalizationTimeConstant, double blendTimeConstant, double& prevBlendTimeConstant);
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
@ -1451,7 +1454,7 @@ template void ComputationNetwork::InitLearnableParameters<double>(const Computat
template void ComputationNetwork::Read<double>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<double>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/ void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, unsigned long& dropOutSeed);
template /*static*/ void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetBatchNormalizationTimeConstants<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double normalizationTimeConstant, double& prevNormalizationTimeConstant, double blendTimeConstant, double& prevBlendTimeConstant);
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);

Просмотреть файл

@ -428,7 +428,7 @@ public:
// TODO: Why are all these static, but then take a network as the first argument? --> make them class members
template <class ElemType>
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, unsigned long& dropOutSeed);
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template <class ElemType>
static void SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode,
@ -478,6 +478,47 @@ public:
return std::vector<ComputationNodeBasePtr>{node};
}
std::vector<ComputationNodeBasePtr> OutputNodesByName(const std::vector<std::wstring>& outputNodeNames)
{
std::vector<ComputationNodeBasePtr> outputNodes;
if (outputNodeNames.size() == 0)
{
if (OutputNodes().size() == 0)
RuntimeError("There is no default output node specified in the network.");
outputNodes = OutputNodes();
}
else
{
for (int i = 0; i < outputNodeNames.size(); i++)
outputNodes.push_back(GetNodeFromName(outputNodeNames[i]));
}
return outputNodes;
}
// Collect all input nodes that outputNodes depend on.
std::vector<ComputationNodeBasePtr> InputNodesForOutputs(const std::vector<std::wstring>& outputNodeNames)
{
// use map to remove duplicated items
auto outputNodes = OutputNodesByName(outputNodeNames);
std::set<ComputationNodeBasePtr> inputNodesMap;
for (auto& onode : outputNodes)
{
for (auto& inode : InputNodes(onode))
inputNodesMap.insert(inode);
}
std::vector<ComputationNodeBasePtr> inputNodes;
for (auto& inode : inputNodesMap)
inputNodes.push_back(inode);
return inputNodes;
}
// these are specified as such by the user
const std::vector<ComputationNodeBasePtr>& FeatureNodes() const { return m_featureNodes ; }
const std::vector<ComputationNodeBasePtr>& LabelNodes() const { return m_labelNodes ; }

Просмотреть файл

@ -105,7 +105,7 @@ ComputationNodeBasePtr ComputationNetwork::GetNestedNetwork(const ComputationNod
ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(const std::vector<shared_ptr<SEQTraversalFlowControlNode>>& recurrentInfo, const std::list<ComputationNodeBasePtr>& allNodes /*must be in eval order*/)
{
// traverse the network in evaluation order and create a new list that replaces all recurrence by a SEQTraversalFlowControlNode
std::set<shared_ptr<IComputationNode>> loopsSeen; // for consistency check only
set<shared_ptr<IComputationNode>> loopsSeen; // for consistency check only
for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end();)
{
shared_ptr<SEQTraversalFlowControlNode> recInfo = FindInRecurrentLoops(recurrentInfo, *nodeIter); // check if this node participates in a recurrent loop
@ -853,18 +853,22 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector<ComputationNodeBa
VerifyIsCompiled("AllocateAllMatrices");
// Due to special topology, if a node is solely induced by parameters, its function value should not be shared
MarkValueNonSharableNodes();
bool performingBackPropagation = (trainRootNode != nullptr);
// Create a composite Eval order with the specified nodes as roots
std::vector<ComputationNodeBasePtr> forwardPropRoots;
forwardPropRoots.insert(forwardPropRoots.end(), evalRootNodes.begin(), evalRootNodes.end());
forwardPropRoots.insert(forwardPropRoots.end(), outValueRootNodes.begin(), outValueRootNodes.end());
if (trainRootNode != nullptr)
forwardPropRoots.push_back(trainRootNode);
// Mark all the eval, output and criterion roots as non-shareable
for (auto& rootNode : forwardPropRoots)
rootNode->MarkValueNonSharable();
// Due to special topology, if a node is solely induced by parameters, its function value should not be shared
MarkValueNonSharableNodes();
bool performingBackPropagation = (trainRootNode != nullptr);
// Create a composite Eval order with the specified nodes as roots
// For each node determine parents and whether the output of the
// node is needed during back propagation
std::unordered_map<ComputationNodeBasePtr, bool> outputValueNeededDuringBackProp;

Просмотреть файл

@ -34,8 +34,11 @@ void ComputationNode<ElemType>::Backprop(const FrameRange& fr, bool childrenInTh
#if 1 // keep enabled once this works
#if 1 // log the cases where this is needed
if (m_needsGradient && !m_gradientInitialized)
//LogicError("%ls %ls operation: Backprop called with uninitialized gradient.", NodeName().c_str(), OperationName().c_str());
fprintf(stderr, "%ls %ls operation: Initializing gradient out of line.\n", NodeName().c_str(), OperationName().c_str());
{
static size_t c = 0;
if (c++ < 100)
fprintf(stderr, "%ls %ls operation: Initializing gradient out of line.\n", NodeName().c_str(), OperationName().c_str());
}
#endif
if (m_needsGradient)
LazyZeroGradient(); // set gradient to 0 if this is the first time
@ -70,6 +73,8 @@ void ComputationNode<ElemType>::Backprop(const FrameRange& fr, bool childrenInTh
// fprintf(stderr, "BackpropTo %d %d %ls %ls\n", (int)fr.timeIdxInSeq, (int)i, NodeName().c_str(), OperationName().c_str());
BackpropTo(i, fr); // this computes partial wrt to the child and sums the gradient value in the child
//child->DebugLogMinibatch(/*gradient*/true);
}
#ifdef DISPLAY_DEBUG
else

Просмотреть файл

@ -230,6 +230,10 @@ public:
{
m_evalTimeStamp = s_timeStampCounter;
}
void SetEvalTimeStampOutdatedWrtAll()
{
m_evalTimeStamp = 0;
}
int64_t GetEvalTimeStamp() const
{
return m_evalTimeStamp;
@ -938,7 +942,7 @@ public:
if (m_value)
{
node->CreateValueMatrixIfNull();
node->m_value->SetValue(*m_value);
node->m_value->SetValue(*m_value);
}
else
node->m_value = nullptr;
@ -1320,7 +1324,7 @@ public:
void UpdateFunctionValuesSize()
{
UpdateDataSize(Value());
Value().CollapseDataLocation(); // actually before writing, should change the name
Value().CollapseDataLocation();
}
// -----------------------------------------------------------------------
@ -1549,6 +1553,7 @@ public:
void Trace()
{
//DebugLogMinibatch();
#if 0
static const std::set<std::wstring> toLog{
L"labelSentenceStartEmbedded",

Просмотреть файл

@ -219,6 +219,7 @@ template class ElementTimesNode<double>;
// If A is minibatch data, then this operation is currently not efficient.
// TODO: Implement this with TensorView::DoElementwiseProductOf() and stride magic
// TODO: Transpose flags for all matrices, inputs and outputs?
// TODO: allow outputRank < 0 meaning to denote "all but", from right
// -----------------------------------------------------------------------
template <class ElemType, bool m_transpose>
@ -232,6 +233,16 @@ public:
{
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
node->m_outputRank = m_outputRank;
}
}
void Save(File& fstream) const
{
Base::Save(fstream);
@ -255,6 +266,8 @@ private:
auto input = inputIndex < 0 ? this : Input(inputIndex).get();
auto data = gradient ? input->GradientPtr() : input->ValuePtr();
size_t rank = input->GetSampleLayout().GetRank();
if (inputIndex == 0 && m_transpose && rank == 1) // transposing a 1D tensor implies it is really a 2D tensor. Note that m_transpose applies to left operand only.
rank = 2;
if (!Input(0)->HasMBLayout()) // left input is no MB data: run normally
return input->DataTensorFor(data, rank, fr);
auto tensorShape = input->GetOneSampleTensorSliceFor(rank, fr);
@ -309,17 +322,18 @@ public:
{
// currently we only support one combination when the input is sparse
// If input data is sparse, then gradient is block sparse.
// BUGBUG: This does not accumulate into the Input(0)->Gradient, which might cause problems elsewhere.
if (Input(1)->Value().GetMatrixType() == SPARSE && Input(0)->Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
Input(0)->Gradient().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
input0Gradient.AddMatrixProductOf(m_transpose/*transC*/, outputGradient, false/*transA*/, input1, true/*transB*/);
}
else if (inputIndex == 1) // right derivative
{
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
auto input1Gradient = OneSampleTensorFor(1, /*gradient=*/true, fr.AllowBroadcast());
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
auto input1Gradient = OneSampleTensorFor(1, /*gradient=*/true, fr.AllowBroadcast());
auto outputGradient = OneSampleTensorFor(-1, /*gradient=*/true, fr);
input1Gradient.AddMatrixProductOf(false/*transC*/, input0, !m_transpose/*transA*/, outputGradient, false/*transB*/);
}
@ -422,9 +436,6 @@ public:
std::swap(dimsA[0], dimsA[1]);
// update if LearnableParameter
Input(0)->ValidateInferInputDimsFrom(TensorShape(dimsA));
// and verify once again
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetDims() != dimsA)
InvalidArgument("%ls %ls operation: Left [%s] and right [%s] operands' shapes are not compatible.", NodeName().c_str(), OperationName().c_str(), dimsAstring.c_str(), dimsBstring.c_str());
}
}
@ -490,7 +501,7 @@ template class TimesNode<double>;
// This differs from TimesNode in that A is transposed, where A must be a
// rank-1 or rank-2 tensor.
// A common use of transposition is trace(X'X) where X is a matrix of samples.
// This can be more efficiently implemented as ReducePlus (ElementTimes (X, X))
// This can be more efficiently implemented as ReduceSum (ElementTimes (X, X))
// -----------------------------------------------------------------------
template <class ElemType>
@ -564,7 +575,7 @@ public:
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
sliceOutputValue.SetValue(sliceInput1Value);
sliceOutputValue.AssignValuesOf(sliceInput1Value);
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
}

Просмотреть файл

@ -17,9 +17,12 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// MatrixPool -- class to support memory sharing
// Despite the gather general name of this class, it is specifically designed to support the memory sharing of ComputationNodes.
// Note: see #define SUPRESS_MEMSHARING below as for how to temporarily disable memory sharing altogether, for debugging
class MatrixPool
{
vector<shared_ptr<Matrix<float>>> m_releasedFloatMatrices;
vector<shared_ptr<Matrix<float>>> m_releasedFloatMatrices;
vector<shared_ptr<Matrix<double>>> m_releasedDoubleMatrices;
template <class ElemType>
@ -30,9 +33,12 @@ public:
template <class ElemType>
void Release(shared_ptr<Matrix<ElemType>> freeMatrix)
{
vector<shared_ptr<Matrix<ElemType>>>& releasedMatrices = GetReleasedMatrices<ElemType>();
if (freeMatrix == nullptr || freeMatrix->GetMatrixType() == SPARSE)
RuntimeError("MatrixPool::Release: freeMatrix should not be null or sparse.");
LogicError("MatrixPool::Release: freeMatrix should not be null or sparse.");
//#define SUPRESS_MEMSHARING // #define this to disable memory sharing through this structure
// TODO: Make this a runtime option.
#ifndef SUPRESS_MEMSHARING
vector<shared_ptr<Matrix<ElemType>>>& releasedMatrices = GetReleasedMatrices<ElemType>();
#ifdef _DEBUG
for (int i = 0; i < releasedMatrices.size(); i++)
{
@ -42,6 +48,7 @@ public:
#endif
releasedMatrices.push_back(freeMatrix);
#endif
}
template <class ElemType>
@ -65,4 +72,5 @@ public:
return matrixPtr;
}
};
} } }
}}}

Просмотреть файл

@ -448,8 +448,18 @@ public:
{
}
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override { return childIndex == 0; }
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual void /*IComputationNode::*/ BeginForwardProp() override // called before first iteration step of ForwardProp()
{
Base::BeginForwardProp();
// we switch result to dense as a work-around because ColumnSlice doesn't support all the sparse formats
// TODO: This is a stopgap. Is this the right thing to do? It changes the matrix type in-place.
Value().SwitchToMatrixType(MatrixType::DENSE, MatrixFormat::matrixFormatDense, false);
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
ValidateNaryZip(isFinalValidationPass, /* allow broadcast */ true, /* num Inputs */ 3);
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
@ -485,10 +495,8 @@ public:
}
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
ValidateNaryZip(isFinalValidationPass, /* allow broadcast */ true, /* num Inputs */ 3);
}
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override { return childIndex == 0; }
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
};
template class IfNode<float>;

Просмотреть файл

@ -321,7 +321,7 @@ public:
inp = Input(0)->ValueFor(frDelayed.Sequence(id));
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed).Sequence(id));
out.SetValue(inp);
out.AssignValuesOf(inp);
}
}
}
@ -358,7 +358,7 @@ public:
inp = Input(0)->ValueFor(frDelayed);
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed));
out.SetValue(inp);
out.AssignValuesOf(inp);
}
}

Просмотреть файл

@ -31,10 +31,13 @@ template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const /*override*/
{
Base::CopyTo(nodeP, newName, flags);
auto node = dynamic_pointer_cast<ReduceElementsNode<ElemType>>(nodeP);
node->m_axis = m_axis;
node->m_operation = m_operation;
node->m_op = m_op;
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<ReduceElementsNode<ElemType>>(nodeP);
node->m_axis = m_axis;
node->m_operation = m_operation;
node->m_op = m_op;
}
}
template <class ElemType>
@ -60,7 +63,7 @@ template <class ElemType>
auto result = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
// the actual operation is a Copy with a reduction op
// the actual operation is a Copy with reduction, where the magic is in the reduction op
result.DoUnaryOpOf(0, input, 1, ElementWiseOperator::opCopy, m_op);
// note: we can implement "Mean" by passing 1/dim for alpha
}
@ -79,7 +82,7 @@ template <class ElemType>
switch (m_op)
{
case ElementWiseOperator::opSum:
// "Plus": broadcast the gradient
// "Sum": broadcast the gradient
sliceInputGrad.AddCopyOf(sliceOutputGrad);
break;
@ -121,9 +124,13 @@ template <class ElemType>
template <class ElemType>
void ReduceElementsNode<ElemType>::ValidateOp()
{
#if 1 // legacy with initial experiments, delete this soon
if (m_operation == L"Plus") m_op = ElementWiseOperator::opSum;
else
#endif
if (m_operation == L"Sum") m_op = ElementWiseOperator::opSum;
// more here
else InvalidArgument("%ls was given an invalid operation code '%ls'. Allowed are: 'Plus'. And a few more soon.", NodeDescription().c_str(), m_operation.c_str());
else InvalidArgument("%ls was given an invalid operation code '%ls'. Allowed are: 'Sum'. And a few more soon.", NodeDescription().c_str(), m_operation.c_str());
}
template <class ElemType>

Просмотреть файл

@ -150,12 +150,12 @@ public:
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
ValueFor(fr).SetValue(Input(0)->ValueFor(fr));
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr));
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
Input(inputIndex)->GradientFor(fr).SetValue(GradientFor(fr));
Input(inputIndex)->GradientFor(fr).AssignValuesOf(GradientFor(fr));
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
@ -175,13 +175,16 @@ template class ReshapeNode<double>;
// Reduces (e.g. sums up) all elements in each sample (column) of the input.
// The optional axis can be 0 (meaning all elements) or a specific axis.
// Allowed operations:
// - "Plus"
// - "LogPlus" --not implemented yet
// - "Sum"
// - "LogSum" --not implemented yet
// - "Mean" --not implemented yet
// - "Max" --not implemented yet
// - "Min" --not implemented yet
// - "All" --not implemented yet
// - "Any" --not implemented yet
// TODO:
// - move to a different header, since it's not really Reshaping
// - consider to change to pass in a set of axes instead of only one
// -----------------------------------------------------------------------
template <class ElemType>
@ -216,7 +219,7 @@ public:
private:
int m_axis;
std::wstring m_operation; // the operation as a string, e.g. "Plus", see GetOpcode()
std::wstring m_operation; // the operation as a string, e.g. "Sum", see ValidateOp()
ElementWiseOperator m_op; // the operation mapped to our internal opCode
};
@ -252,7 +255,7 @@ public:
Input(1)->NodeName().c_str(), Input(1)->OperationName().c_str());
// copy the data from 'dataInput'
ValueFor(fr).SetValue(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
// TODO: Once we do in-place, the above must include a copy-to-self check (either here or inside the matrix lib).
}
@ -653,6 +656,7 @@ public:
WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
Base(deviceId, name)
{
MarkValueNonSharable();
}
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
@ -693,6 +697,7 @@ public:
PackedIndexNode(DEVICEID_TYPE deviceId, const wstring& name) :
Base(deviceId, name)
{
MarkValueNonSharable();
}
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
@ -1077,7 +1082,7 @@ public:
// (We still need to copy the values since there is currently no way to point to an input function value while reshaping at the same time.)
if (!m_pMBLayout || factor() == 1)
{
Value().Reshaped(newCols * m_numTargetRows, 1).SetValue(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
Value().Reshaped(newCols * m_numTargetRows, 1).AssignValuesOf(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
}
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
else
@ -1371,7 +1376,7 @@ reductions
----------
- these are/will be implemented as a node for samples, and as recurrences for sequences
- ReducePlus
- ReduceSum
- sum over all elements of a dimension, or over time
- ReduceMax, ReduceMin
- max

Просмотреть файл

@ -7,6 +7,7 @@
#include "Basics.h"
#include "ComputationNode.h"
#include "BatchNormalizationEngine.h"
#include "RNGHandle.h"
#include <map>
#include <string>
@ -177,6 +178,7 @@ public:
// first compute the softmax (column-wise)
// Note that we need both log and non-log for gradient computation.
m_logSoftmaxOfRight->AssignLogSoftmaxOf(Input(1)->ValueFor(fr), true);
// BUGBUG: No need to compute m_softmaxOfRight in ForwardProp, should be moved to BackpropTo().
m_softmaxOfRight->SetValue(*m_logSoftmaxOfRight);
m_softmaxOfRight->InplaceExp();
// flatten all gaps to zero, such that gaps will contribute zero to the sum
@ -780,7 +782,7 @@ private:
case 3:
{
Matrix<ElemType> grd_t = Input(CLASSPROBINDATA)->GradientFor(fr);
grd_t.SetValue(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
grd_t.AssignValuesOf(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
ComputeCEPartialToSoftmaxInputs(grd_t, Gradient(), c_t);
break;
}
@ -811,7 +813,7 @@ private:
size_t idx_in_class = y_t - lft_bnd;
ComputeCEPartialToSoftmaxInputs(softMax, Gradient(), idx_in_class);
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).SetValue(softMax);
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).AssignValuesOf(softMax);
});
m_needRecomputeGradientToSoftmaxInput = false;
@ -1459,8 +1461,7 @@ public:
{
// determine drop-out mask for this minibatch
auto sliceMask = DataFor(*m_maskOfDropout, fr);
sliceMask.SetUniformRandomMask((ElemType) m_dropoutRate, (ElemType)(1.0 / (1.0 - m_dropoutRate)) /*pre-scaled*/, m_randomSeed);
m_randomSeed += 1073807359; // 1073807359 is a very large prime number to avoid collision with other dropout nodes
sliceMask.SetUniformRandomMask((ElemType)m_dropoutRate, (ElemType)(1.0 / (1.0 - m_dropoutRate)) /*pre-scaled*/, GetRNGHandle());
// apply dropout mask
sliceOutputValue.AssignElementProductOf(sliceMask, sliceInput0Value);
}
@ -1482,6 +1483,18 @@ public:
void SetRandomSeed(const unsigned long val)
{
m_randomSeed = (unsigned long) val;
// Upon change of the seed, reset RNGHandle to force the creation of a new RNGHandle
// during forward propagation
m_RNGHandle = nullptr;
}
RNGHandle& GetRNGHandle()
{
if (m_RNGHandle == nullptr)
m_RNGHandle = RNGHandle::Create(ValuePtr()->GetDeviceId(), m_randomSeed);
return *m_RNGHandle;
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1512,6 +1525,7 @@ public:
private:
double m_dropoutRate;
unsigned long m_randomSeed;
std::shared_ptr<RNGHandle> m_RNGHandle;
shared_ptr<Matrix<ElemType>> m_maskOfDropout;
};
@ -1765,10 +1779,10 @@ public:
}
m_bnEng->Forward(sliceInputValue, scale, bias, expAvgFactor, blendFactor, runMean, runInvStdDev,
sliceOutputValue, m_epsilon, *m_saveMean, *m_saveInvStdDev);
sliceOutputValue, m_epsilon, *m_saveMean, *m_saveInvStdDev);
m_mbCount++;
}
m_mbCount++;
}
void Validate(bool isFinalValidationPass) override
{

Просмотреть файл

@ -18,6 +18,11 @@
#endif
#include "BestGpu.h"
#include "MPIWrapper.h"
#include "DataDeserializer.h"
#include "SequencePacker.h"
#include "NoRandomizer.h"
#include "HeapMemoryProvider.h"
#include "InputAndParamNodes.h"
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
@ -26,7 +31,50 @@ bool g_shareNodeValueMatrices = false;
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
template <typename ElemType>
void CNTKEvalBase<ElemType>::Init(const std::string& config)
{
m_config.Parse(config);
size_t nThreads = m_config("numCPUThreads", "1");
CPUMatrix<ElemType>::SetNumThreads(nThreads);
g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
}
// CreateNetwork - create a network based on the network description
// networkDescription - network description
template <typename ElemType>
void CNTKEvalBase<ElemType>::CreateNetwork(const std::string& networkDescription)
{
ConfigParameters config;
config.Parse(networkDescription);
std::vector<wstring> outputNodeNames;
m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
if (m_net == nullptr)
{
LogicError("Unable to construct network from description");
}
}
// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template <typename ElemType>
void CNTKEvalBase<ElemType>::Destroy()
{
// cleanup everything
m_net.reset();
}
// ----------------------------------------------------------------------------
// Basic interface
// ----------------------------------------------------------------------------
template <typename ElemType>
void EVAL_API GetEval(IEvaluateModel<ElemType>** peval)
{
*peval = new CNTKEval<ElemType>();
@ -41,51 +89,11 @@ extern "C" EVAL_API void GetEvalD(IEvaluateModel<double>** peval)
GetEval(peval);
}
template <class ElemType>
void CNTKEval<ElemType>::Init(const std::string& config)
{
m_start = 0;
m_config.Parse(config);
size_t nThreads = m_config("numCPUThreads", "1");
CPUMatrix<ElemType>::SetNumThreads(nThreads);
g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
}
// Destroy - cleanup and remove this class
// NOTE: this destroys the object, and it can't be used past this point
template <class ElemType>
void CNTKEval<ElemType>::Destroy()
{
// cleanup everything
m_net.reset();
delete m_reader;
delete m_writer;
delete this;
}
// CreateNetwork - create a network based on the network description
// networkDescription - network description
template <class ElemType>
void CNTKEval<ElemType>::CreateNetwork(const std::string& networkDescription)
{
ConfigParameters config;
config.Parse(networkDescription);
std::vector<wstring> outputNodeNames;
m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, outputNodeNames);
if (m_net == nullptr)
{
LogicError("Unable to construct network from description");
}
}
// GetNodeDimensions - Get the node dimensions of the specified nodes
// dimensions - map from name of node to dimension of the node, will be appended to for Input/Output scenarios
// nodeGroup - type of node we are requesting (input/output/specified)
// NOTE: when nodeGroup==specified the dimensions map is expected to be populated with the string names of the nodes requested, dimensions will be modified return the current value.
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)
{
if (m_net == NULL)
@ -137,7 +145,7 @@ void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimen
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNodeName)
{
m_net->StartEvaluateMinibatchLoop(m_net->GetNodeFromName(outputNodeName));
@ -146,7 +154,7 @@ void CNTKEval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring& outputNo
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)
{
size_t minibatchSize = m_config(L"minibatchSize", (size_t) 10240);
@ -183,7 +191,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
// Evaluate - Evalute using the model with the given inputs and outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
template <class ElemType>
template <typename ElemType>
void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs)
{
// get the evaluation names from the output string
@ -206,14 +214,168 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
eval.WriteOutput(*m_writer, outNodeNames);
}
// ResetState - Reset the cell state when we get start of an utterance
template <class ElemType>
void CNTKEval<ElemType>::ResetState()
template <typename ElemType>
void CNTKEval<ElemType>::Destroy()
{
m_start = 1 - m_start;
CNTKEvalBase<ElemType>::Destroy();
delete m_reader;
delete m_writer;
delete this;
}
// instantiate all the combinations we expect to be used
template class CNTKEval<double>;
template class CNTKEval<float>;
// ----------------------------------------------------------------------------
// Extended interface
// ----------------------------------------------------------------------------
template<typename ElemType>
VariableLayout CNTKEvalExtended<ElemType>::ToVariableLayout(const ComputationNodeBasePtr n)
{
auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(n->ValuePtr());
return VariableLayout
{
/* name */ n->GetName(),
/* type */ sizeof(ElemType) == sizeof(float) ? VariableLayout::Float32 : VariableLayout::Float64,
/* storage */ matrix ? matrix->GetMatrixType() == MatrixType::DENSE ? VariableLayout::Dense :
matrix->GetMatrixType() == MatrixType::SPARSE ? VariableLayout::Sparse :
VariableLayout::Undetermined :
VariableLayout::Undetermined,
/* dimension */ n->GetSampleLayout().GetNumElements(),
/* dynamic axis */ wstring(n->GetMBLayout()->GetAxisName())
};
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::StartForwardEvaluation(std::vector<wstring> outputNodeNames)
{
m_scopedNetworkOperationMode = make_shared<ScopedNetworkOperationMode>(m_net, NetworkOperationMode::inferring);
// allocate memory for forward computation
m_outputNodes = m_net->OutputNodesByName(outputNodeNames);
m_inputNodes = m_net->InputNodesForOutputs(outputNodeNames);
// allocate memory for forward computation
m_net->AllocateAllMatrices({}, m_outputNodes, nullptr);
m_net->StartEvaluateMinibatchLoop(m_outputNodes);
m_inputMatrices = DataReaderHelpers::RetrieveInputMatrices(m_inputNodes);
}
template<typename ElemType>
VariableSchema CNTKEvalExtended<ElemType>::GetOutputSchema() const
{
VariableSchema schema;
for (const auto& n : m_net->OutputNodes())
{
schema.push_back(ToVariableLayout(n));
}
return schema;
}
template<typename ElemType>
VariableSchema CNTKEvalExtended<ElemType>::GetInputSchema() const
{
VariableSchema inputLayouts;
auto nodes = m_inputNodes;
if (nodes.size() == 0)
{
// Default to all nodes
nodes = m_net->InputNodesForOutputs({});
}
for (const auto& n : nodes)
{
inputLayouts.push_back(ToVariableLayout(n));
}
return inputLayouts;
}
template<typename ElemType>
void CNTKEvalExtended<ElemType>::ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output)
{
if (inputs.size() != (size_t)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()))
{
RuntimeError("Expected %d inputs, but got %d", (int)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()), (int)inputs.size());
}
int i = 0;
for (auto& input : m_inputMatrices)
{
VariableBuffer<ElemType> buffer = inputs[i];
int numRows = input.second.sampleLayout.GetNumElements();
int numCols = buffer.m_numberOfSamples;
shared_ptr<Matrix<ElemType>> matrix = dynamic_pointer_cast<Matrix<ElemType>>(input.second.matrix);
auto type = matrix->GetMatrixType();
input.second.pMBLayout->Init(1, numCols);
input.second.pMBLayout->AddSequence(0, 0, 0, numCols);
if (type == MatrixType::DENSE)
{
matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal);
}
else if (type == MatrixType::SPARSE)
{
// In the sparse case the m_data layout is identical to CUDA's CSC layout
// (see http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc).
matrix->SetMatrixFromCSCFormat(buffer.m_colIndices.data(), buffer.m_indices.data(), buffer.m_buffer.data(), buffer.m_buffer.size(), numRows, numCols);
}
++i;
}
ComputationNetwork::BumpEvalTimeStamp(m_inputNodes);
for (int i = 0; i < m_outputNodes.size(); ++i)
{
auto node = m_outputNodes[i];
m_net->ForwardProp(node);
shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr());
auto pMBLayout = node->GetMBLayout();
if (!pMBLayout)
{
pMBLayout = make_shared<MBLayout>();
pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample
}
const auto& seq = pMBLayout->GetAllSequences();
if (seq.size() != 1)
{
RuntimeError("Only 1 sequence supported by this API"); // TODO
}
std::vector<ElemType>& vec = output[i].m_buffer;
vec.resize(outputMatrix->GetNumElements());
ElemType* data = const_cast<ElemType*>(vec.data());
size_t numElements = outputMatrix->GetNumElements();
outputMatrix->CopyToArray(data, numElements);
}
}
template <typename ElemType>
void CNTKEvalExtended<ElemType>::Destroy()
{
CNTKEvalBase<ElemType>::Destroy();
delete this;
}
template <typename ElemType>
void EVAL_API GetEvalExtended(IEvaluateModelExtended<ElemType>** peval)
{
*peval = new CNTKEvalExtended<ElemType>();
}
extern "C" EVAL_API void GetEvalExtendedF(IEvaluateModelExtended<float>** peval)
{
GetEvalExtended(peval);
}
extern "C" EVAL_API void GetEvalExtendedD(IEvaluateModelExtended<double>** peval)
{
GetEvalExtended(peval);
}
template class CNTKEvalExtended<double>;
template class CNTKEvalExtended<float>;
} } }

Просмотреть файл

@ -22,48 +22,97 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class CNTKEval : public IEvaluateModel<ElemType>
template <typename ElemType>
class CNTKEvalBase : public IEvaluateModelBase<ElemType>
{
protected:
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
EvalReader<ElemType>* m_reader;
EvalWriter<ElemType>* m_writer;
ConfigParameters m_config;
ComputationNetworkPtr m_net;
std::map<std::wstring, size_t> m_dimensions;
size_t m_start;
public:
// constructor
CNTKEval()
: m_reader(nullptr), m_net(nullptr)
{
}
CNTKEvalBase() : m_net(nullptr) { }
public:
// CreateNetwork - create a network based on the network description
// networkDescription - network description
virtual void CreateNetwork(const std::string& networkDescription);
// GetNodeDimensions - Get the node dimensions of the specified nodes
// dimensions - map from name of node to dimension of the node
// nodeGroup - type of node we are requesting (input/output/specified)
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
// Evaluate - Evalute using the model with the given inputs and outputs
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Init(const std::string& config);
virtual void Destroy();
virtual void ResetState();
};
// ------------------------------------------------------------------------
// Basic interface
// ------------------------------------------------------------------------
template <typename ElemType>
class CNTKEval : public CNTKEvalBase<ElemType>, public IEvaluateModel<ElemType>
{
EvalReader<ElemType>* m_reader;
EvalWriter<ElemType>* m_writer;
std::map<std::wstring, size_t> m_dimensions;
size_t m_start;
public:
CNTKEval() : CNTKEvalBase<ElemType>(), m_reader(nullptr), m_writer(nullptr) {}
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
virtual void StartEvaluateMinibatchLoop(const std::wstring& outputNodeName);
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& outputs);
virtual void Destroy() override;
virtual void CreateNetwork(const std::string& networkDescription) override
{
CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
}
virtual void Init(const std::string& config) override
{
CNTKEvalBase<ElemType>::Init(config);
m_start = 0;
}
virtual void ResetState() override
{
m_start = 1 - m_start;
}
};
// ------------------------------------------------------------------------
// Extended interface
// ------------------------------------------------------------------------
template <typename ElemType>
class CNTKEvalExtended : public CNTKEvalBase<ElemType>, public IEvaluateModelExtended<ElemType>
{
virtual VariableSchema GetOutputSchema() const override;
virtual void StartForwardEvaluation(std::vector<wstring> outputs) override;
virtual VariableSchema GetInputSchema() const override;
virtual void ForwardPass(const Variables<ElemType>& inputs, Variables<ElemType>& output) override;
virtual void Destroy() override;
virtual void CreateNetwork(const std::string& networkDescription) override
{
CNTKEvalBase<ElemType>::CreateNetwork(networkDescription);
}
virtual void Init(const std::string& config) override
{
CNTKEvalBase<ElemType>::Init(config);
}
private:
static VariableLayout ToVariableLayout(const ComputationNodeBasePtr n);
std::vector<ComputationNodeBasePtr> m_outputNodes;
std::shared_ptr<ScopedNetworkOperationMode> m_scopedNetworkOperationMode;
std::vector<ComputationNodeBasePtr> m_inputNodes;
StreamMinibatchInputs m_inputMatrices;
};
} } }

Просмотреть файл

@ -55,8 +55,8 @@
<TargetName>EvalDll</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath)</AdditionalLibraryDirectories>
@ -99,7 +99,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib;ReaderLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
@ -153,4 +153,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -2,39 +2,18 @@
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="CNTKEval.cpp" />
<ClCompile Include="..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="dllmain.cpp">
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="stdafx.cpp">
<Filter>Misc</Filter>
</ClCompile>
<ClCompile Include="..\Common\Config.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\Common\Eval.cpp">
<Filter>For External Use</Filter>
</ClCompile>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\Common\DataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="EvalReader.h" />

Просмотреть файл

@ -150,7 +150,7 @@ public:
return true;
}
size_t GetNumParallelSequences()
size_t GetNumParallelSequencesForFixingBPTTMode()
{
return 1;
}

Просмотреть файл

@ -0,0 +1,99 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CPPEvalClient.cpp : Sample application using the evaluation interface from C++
//
#include "stdafx.h"
#include "eval.h"
using namespace Microsoft::MSR::CNTK;
// Used for retrieving the model appropriate for the element type (float / double)
template<typename ElemType>
using GetEvalProc = void(*)(IEvaluateModel<ElemType>**);
typedef std::pair<std::wstring, std::vector<float>*> MapEntry;
typedef std::map<std::wstring, std::vector<float>*> Layer;
/// <summary>
/// Program for demonstrating how to run model evaluations using the native evaluation interface
/// </summary>
/// <description>
/// This program is a native C++ client using the native evaluation interface
/// located in the <see cref="eval.h"/> file.
/// The CNTK evaluation dll (EvalDLL.dll), must be found through the system's path.
/// The other requirement is that Eval.h be included
/// In order to run this program the model must already exist in the example. To create the model,
/// first run the example in <CNTK>/Examples/Image/MNIST. Once the model file 01_OneHidden is created,
/// you can run this client.
/// This program demonstrates the usage of the Evaluate method requiring the input and output layers as parameters.
int _tmain(int argc, _TCHAR* argv[])
{
// Get the binary path (current working directory)
argc = 0;
std::wstring wapp(argv[0]);
std::string app(wapp.begin(), wapp.end());
std::string path = app.substr(0, app.rfind("\\"));
// Load the eval library
auto hModule = LoadLibrary(L"evaldll.dll");
if (hModule == nullptr)
{
const std::wstring msg(L"Cannot find evaldll.dll library");
const std::string ex(msg.begin(), msg.end());
throw new std::exception(ex.c_str());
}
// Get the factory method to the evaluation engine
std::string func = "GetEvalF";
auto procAddress = GetProcAddress(hModule, func.c_str());
auto getEvalProc = (GetEvalProc<float>)procAddress;
// Native model evaluation instance
IEvaluateModel<float> *model;
getEvalProc(&model);
// This relative path assumes launching from CNTK's binary folder
const std::string modelWorkingDirectory = path + "\\..\\..\\Examples\\Image\\MNIST\\Data\\";
const std::string modelFilePath = modelWorkingDirectory + "..\\Output\\Models\\01_OneHidden";
// Load model
model->CreateNetwork("modelPath=\"" + modelFilePath + "\"");
// get the model's layers dimensions
std::map<std::wstring, size_t> inDims;
std::map<std::wstring, size_t> outDims;
model->GetNodeDimensions(inDims, NodeGroup::nodeInput);
model->GetNodeDimensions(outDims, NodeGroup::nodeOutput);
// Generate dummy input values in the appropriate structure and size
auto inputLayerName = inDims.begin()->first;
std::vector<float> inputs;
for (int i = 0; i < inDims[inputLayerName]; i++)
{
inputs.push_back(static_cast<float>(i % 255));
}
// Allocate the output values layer
std::vector<float> outputs;
// Setup the maps for inputs and output
Layer inputLayer;
inputLayer.insert(MapEntry(inputLayerName, &inputs));
Layer outputLayer;
auto outputLayerName = outDims.begin()->first;
outputLayer.insert(MapEntry(outputLayerName, &outputs));
// We can call the evaluate method and get back the results (single layer)...
model->Evaluate(inputLayer, outputLayer);
// Output the results
for each (auto& value in outputs)
{
fprintf(stderr, "%f\n", value);
}
return 0;
}

Просмотреть файл

@ -0,0 +1,119 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_CpuOnly|x64">
<Configuration>Debug_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_CpuOnly|x64">
<Configuration>Release_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{578D52A0-3928-4405-A016-F016E8B49031}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CPPEvalClient</RootNamespace>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
<UseIntelIPP>false</UseIntelIPP>
</PropertyGroup>
<!--Importing CPP defaults must occur after declaring the desired toolset above
Otherwise, the build may default back to an previous toolset -->
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<!-- TODO intentional for all? -->
<LinkIncremental>false</LinkIncremental>
<TargetName>CPPEvalClient</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;UNICODE;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>Disabled</Optimization>
<MinimalRebuild>false</MinimalRebuild>
</ClCompile>
<Link />
<ProjectReference>
<LinkLibraryDependencies>false</LinkLibraryDependencies>
</ProjectReference>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<FloatingPointExceptions>false</FloatingPointExceptions>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<ProjectReference>
<LinkLibraryDependencies>true</LinkLibraryDependencies>
</ProjectReference>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="CPPEvalClient.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

Просмотреть файл

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="targetver.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="stdafx.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="CPPEvalClient.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,12 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// stdafx.cpp : source file that includes just the standard includes
// CPPEvalClient.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

Просмотреть файл

@ -0,0 +1,19 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "targetver.h"
#include <stdio.h>
#include <tchar.h>
#include "targetver.h"
// This is a windows only application
#include "Windows.h"

Просмотреть файл

@ -0,0 +1,13 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>

Просмотреть файл

@ -9,7 +9,6 @@ using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Configuration;
namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
@ -63,6 +62,8 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
try
{
string outputLayerName;
// The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\");
@ -70,22 +71,22 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
using (var model = new IEvaluateModelManagedF())
{
// Initialize model evaluator
string config = GetFileContents(Path.Combine(Environment.CurrentDirectory, @"..\Config\01_OneHidden.cntk"));
model.Init(config);
// Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden");
model.CreateNetwork(string.Format("deviceId=-1\nmodelPath=\"{0}\"", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId:-1);
// Generate random input values in the appropriate structure and size
var inputs = GetDictionary("features", 28*28, 255);
var inDims = model.GetNodeDimensions(NodeGroup.nodeInput);
var inputs = GetDictionary(inDims.First().Key, inDims.First().Value, 255);
// We request the output layer names(s) and dimension, we'll use the first one.
var outDims = model.GetNodeDimensions(NodeGroup.nodeOutput);
outputLayerName = outDims.First().Key;
// We can call the evaluate method and get back the results (single layer)...
outputs = model.Evaluate(inputs, "ol.z", 10);
outputs = model.Evaluate(inputs, outputLayerName);
}
OutputResults("ol.z", outputs);
OutputResults(outputLayerName, outputs);
}
catch (CNTKException ex)
{
@ -112,20 +113,20 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
using (var model = new IEvaluateModelManagedF())
{
// Initialize model evaluator
string config = GetFileContents(Path.Combine(Environment.CurrentDirectory, @"..\Config\01_OneHidden.cntk"));
model.Init(config);
// Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden");
model.CreateNetwork(string.Format("deviceId=-1\nmodelPath=\"{0}\"", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId:-1);
// Generate random input values in the appropriate structure and size
var inputs = GetDictionary("features", 28*28, 255);
var inDims = model.GetNodeDimensions(NodeGroup.nodeInput);
var inputs = GetDictionary(inDims.First().Key, inDims.First().Value, 255);
// We request the output layer names(s) and dimension, we'll use the first one.
var outDims = model.GetNodeDimensions(NodeGroup.nodeOutput);
string outputLayerName = outDims.First().Key;
// We can preallocate the output structure and pass it in (multiple output layers)
outputs = GetDictionary("ol.z", 10, 1);
outputs = GetDictionary(outputLayerName, outDims[outputLayerName], 1);
model.Evaluate(inputs, outputs);
}
@ -154,24 +155,26 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
Environment.CurrentDirectory = initialDirectory;
List<float> outputs;
string outputLayerName;
using (var model = new IEvaluateModelManagedF())
{
// Initialize model evaluator
model.Init("deviceId=-1");
// Create the network
string networkDescription = GetFileContents(Path.Combine(workingDirectory, @"AddOperatorConstant.cntk"));
model.CreateNetwork(networkDescription);
// This network (AddOperatorConstant.cntk) is a simple network consisting of a single binary operator (Plus)
// operating over a single input and a constant
string networkDescription = File.ReadAllText(Path.Combine(workingDirectory, @"AddOperatorConstant.cntk"));
model.CreateNetwork(networkDescription, deviceId:-1);
// Generate random input values in the appropriate structure and size
var inputs = new Dictionary<string, List<float>>() { { "features", new List<float>() { { 1.0f } } } };
// Generate random input value in the appropriate structure and size
var inputs = new Dictionary<string, List<float>>() { { "features", new List<float>() { 1.0f } } };
// We can call the evaluate method and get back the results (single layer)...
outputs = model.Evaluate(inputs, "ol", 1);
var outDims = model.GetNodeDimensions(NodeGroup.nodeOutput);
outputLayerName = outDims.First().Key;
outputs = model.Evaluate(inputs, outputLayerName);
}
OutputResults("ol", outputs);
OutputResults(outputLayerName, outputs);
}
catch (CNTKException ex)
{
@ -199,12 +202,11 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
using (var model = new IEvaluateModelManagedF())
{
// Initialize model evaluator
model.Init("deviceId=-1");
// Create the network
string networkDescription = GetFileContents(Path.Combine(workingDirectory, @"AddOperatorConstantNoInput.cntk"));
model.CreateNetwork(networkDescription);
// This network (AddOperatorConstantNoInput.cntk) is a simple network consisting of a single binary operator (Plus)
// operating over a two constants, therefore no input is necessary.
string networkDescription = File.ReadAllText(Path.Combine(workingDirectory, @"AddOperatorConstantNoInput.cntk"));
model.CreateNetwork(networkDescription, deviceId:-1);
// We can call the evaluate method and get back the results (single layer)...
outputs = model.Evaluate("ol", 1);
@ -273,16 +275,6 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
return dict;
}
/// <summary>
/// Reads the configuration file and returns the contents as a string
/// </summary>
/// <returns>The content of the configuration file</returns>
static string GetFileContents(string filePath)
{
var lines = System.IO.File.ReadAllLines(filePath);
return string.Join("\n", lines);
}
/// <summary>
/// Creats a list of random numbers
/// </summary>

Просмотреть файл

@ -33,6 +33,14 @@ ref class CNTKException;
template<typename ElemType>
using GetEvalProc = void(*)(IEvaluateModel<ElemType>**);
/// Enumeration for the types of nodes
public enum class NodeGroup
{
nodeInput, // an input node
nodeOutput, // an output node
nodeSpecified
};
/// Managed wrapper for the native evaluation model
template<typename ElemType>
public ref class IEvaluateModelManaged : IDisposable
@ -110,11 +118,24 @@ public:
}
}
/// <summary>Creates a network based from the network description in the configuration</summary>
/// <param name="networkDescription">The configuration file containing the network description</param>
/// <param name="deviceId">The device ID to specify for the network</param>
void CreateNetwork(String^ networkDescription, int deviceId)
{
if (m_eval == nullptr)
{
throw gcnew ObjectDisposedException("Object has been disposed.");
}
this->CreateNetwork(String::Format("deviceId={0}\n{1}", deviceId, networkDescription));
}
/// <summary>Evaluates the model using a single forward feed pass and retrieves the output layer data</summary>
/// <param name="outputKey"></param>
/// <param name="outputSize"></param>
/// <returns>Results for specified layer</returns>
List<ElemType>^ Evaluate(String^ outputKey, int outputSize)
__declspec(deprecated) List<ElemType>^ Evaluate(String^ outputKey, int outputSize)
{
if (m_eval == nullptr)
{
@ -179,6 +200,75 @@ public:
}
}
/// <summary>Evaluates the model using a single forward feed pass and retrieves the output layer data</summary>
/// <param name="outputKey"></param>
/// <param name="outputSize"></param>
/// <returns>Results for specified layer</returns>
List<ElemType>^ Evaluate(String^ outputKey)
{
if (m_eval == nullptr)
{
throw gcnew ObjectDisposedException("Object has been disposed.");
}
std::map<std::wstring, std::vector<ElemType>*> stdOutputs;
try
{
std::vector<shared_ptr<std::vector<ElemType>>> sharedOutputVectors;
int outputSize = GetNodeDimensions(NodeGroup::nodeOutput)[outputKey];
List<ElemType>^ outputs = gcnew List<ElemType>(outputSize);
for (int i = 0; i < outputSize; i++)
{
outputs->Add(*(gcnew ElemType));
}
Dictionary<String^, List<ElemType>^>^ outputMap = gcnew Dictionary<String^, List<ElemType>^>();
outputMap->Add(outputKey, outputs);
for each (auto item in outputMap)
{
pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
sharedOutputVectors.push_back(ptr);
stdOutputs.insert(MapEntry(key, ptr.get()));
}
try
{
m_eval->Evaluate(stdOutputs);
}
catch (const exception& ex)
{
throw GetCustomException(ex);
}
auto enumerator = outputMap->Keys->GetEnumerator();
for (auto& map_item : stdOutputs)
{
// Retrieve the layer key
enumerator.MoveNext();
String^ key = enumerator.Current;
std::vector<ElemType> &refVec = *(map_item.second);
int index = 0;
// Copy output to CLI structure
for (auto& vec : refVec)
{
outputMap[key][index++] = vec;
}
}
return outputMap[outputKey];
}
catch (Exception^)
{
throw;
}
}
/// <summary>Evaluates the model against input data and retrieves the output layer data</summary>
/// <param name="inputs"></param>
/// <param name="outputs"></param>
@ -250,7 +340,7 @@ public:
/// <param name="outputKey"></param>
/// <param name="outputSize"></param>
/// <returns>Results for specified layer</returns>
List<ElemType>^ Evaluate(Dictionary<String^, List<ElemType>^>^ inputs, String^ outputKey, int outputSize)
__declspec(deprecated) List<ElemType>^ Evaluate(Dictionary<String^, List<ElemType>^>^ inputs, String^ outputKey, int outputSize)
{
List<ElemType>^ outputs = gcnew List<ElemType>(outputSize);
for (int i = 0; i < outputSize; i++)
@ -266,6 +356,63 @@ public:
return outputMap[outputKey];
}
/// <summary>Evaluates the model against input data and retrieves the desired output layer data</summary>
/// <param name="inputs"></param>
/// <param name="outputKey"></param>
/// <returns>Results for requested layer</returns>
List<ElemType>^ Evaluate(Dictionary<String^, List<ElemType>^>^ inputs, String^ outputKey)
{
auto outDims = GetNodeDimensions(NodeGroup::nodeOutput);
int outputSize = outDims[outputKey];
List<ElemType>^ outputs = gcnew List<ElemType>(outputSize);
for (int i = 0; i < outputSize; i++)
{
outputs->Add(*(gcnew ElemType));
}
Dictionary<String^, List<ElemType>^>^ outputMap = gcnew Dictionary<String^, List<ElemType>^>();
outputMap->Add(outputKey, outputs);
Evaluate(inputs, outputMap);
return outputMap[outputKey];
}
/// <summary>Returns the layer(s) and associated dimensions for the specified node group
/// <param name="nodeGroup">The node type to query for</param>
/// <returns>A dictionary mapping layer names to their dimension</returns>
Dictionary<String^, int>^ GetNodeDimensions(NodeGroup nodeGroup)
{
if (m_eval == nullptr)
{
throw gcnew ObjectDisposedException("Object has been disposed.");
}
std::map<std::wstring, size_t> stdDims;
try
{
Microsoft::MSR::CNTK::NodeGroup gr(GetNodeGroup(nodeGroup));
m_eval->GetNodeDimensions(stdDims, gr);
}
catch (const exception& ex)
{
throw GetCustomException(ex);
}
Dictionary<String^, int>^ dims = gcnew Dictionary<String^, int>();
for (auto& map_item : stdDims)
{
String^ key = gcnew String(map_item.first.c_str());
int dim = static_cast<int>(map_item.second);
dims->Add(key, dim);
}
return dims;
}
~IEvaluateModelManaged()
{
if (m_eval == nullptr)
@ -336,6 +483,23 @@ private:
return gcnew CNTKException(gcnew System::String(ex.what()));
}
}
/// <summary Converts a managed (CLI) enum NodeGroup to a native NodeGroup
/// <param name="nodeGroup">The managed (CLI) NodeGroup to convert to native</param>
Microsoft::MSR::CNTK::NodeGroup GetNodeGroup(NodeGroup nodeGroup)
{
switch ((int)nodeGroup)
{
case Microsoft::MSR::CNTK::NodeGroup::nodeInput:
return Microsoft::MSR::CNTK::NodeGroup::nodeInput;
case Microsoft::MSR::CNTK::NodeGroup::nodeOutput:
return Microsoft::MSR::CNTK::NodeGroup::nodeOutput;
case Microsoft::MSR::CNTK::NodeGroup::nodeSpecified:
return Microsoft::MSR::CNTK::NodeGroup::nodeSpecified;
default:
throw gcnew CNTKRuntimeException(String::Format("Cannot convert native NodeGroup with value: {0} to corresponding managed NodeGroup.",(int)nodeGroup), "");
}
}
};
/// <summary>Managed float-specific model evaluation class</summary>
@ -420,19 +584,35 @@ public:
// explanation to this behavior
void emit()
{
Dictionary<String^, List<float>^>^ nullDictF = nullptr;
Dictionary<String^, List<double>^>^ nullDictD = nullptr;
IEvaluateModelManagedF f;
f.Init("");
f.Evaluate(nullptr, nullptr);
f.Evaluate(nullptr, "", 0);
f.Evaluate("", 0);
f.Evaluate(nullptr, nullDictF);
f.Evaluate(nullptr, "");
f.Evaluate("");
f.CreateNetwork("");
f.CreateNetwork("", 0);
f.GetNodeDimensions(NodeGroup::nodeSpecified);
IEvaluateModelManagedD d;
d.Init("");
d.Evaluate(nullptr, nullptr);
d.Evaluate(nullptr, nullDictD);
d.Evaluate(nullptr, "");
d.Evaluate("");
d.CreateNetwork("");
d.CreateNetwork("", 0);
d.GetNodeDimensions(NodeGroup::nodeSpecified);
// Deprecated code, hush warnings locally only
#pragma warning(push)
#pragma warning(disable: 4996)
f.Evaluate(nullptr, "", 0);
f.Evaluate("", 0);
d.Evaluate(nullptr, "", 0);
d.Evaluate("", 0);
d.CreateNetwork("");
#pragma warning(pop)
}
}}}}}

Просмотреть файл

@ -659,8 +659,8 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::DoGatherColumnsOf(ElemType beta, const
#pragma omp parallel for // TODO: Depending in circumstance, it may be more efficient to parallelize over rows.
foreach_column(jOut, us)
{
auto jInF = idx(0, jOut); // this is the column we need to get
if (jInF < 0) // negative index means gap
auto jInF = idx(0, jOut); // this is the column we need to get
if (std::isnan(jInF) || jInF < 0) // negative index means gap
continue;
size_t jIn = (size_t)jInF;
if (jIn >= a.GetNumCols())
@ -691,8 +691,8 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, cons
#pragma omp parallel for // TODO: Depending in circumstance, it may be more efficient to parallelize over rows.
foreach_column(jIn, a)
{
auto jOutF = idx(0, jIn); // this is the column we copy/add into
if (jOutF < 0) // negative index means gap
auto jOutF = idx(0, jIn); // this is the column we copy/add into
if (std::isnan(jOutF) || jOutF < 0) // negative index means gap
continue;
size_t jOut = (size_t)jOutF;
if (jOut >= GetNumCols())
@ -715,11 +715,12 @@ void CPUMatrix<ElemType>::SetValue(const ElemType v)
}
else
{
ElemType* bufPtr = Data();
ElemType* bufPtr = Data();
long m = (long) GetNumElements();
// 2-way thread parallelism is sufficient for the memory bound
// operation of just setting the values of an array.
const unsigned SETVALUE_NUM_THREADS = 2;
UNUSED(SETVALUE_NUM_THREADS); // in case OMP is turned off.
#pragma omp parallel for num_threads(SETVALUE_NUM_THREADS)
// four-way unrolling
for (long i = 0; i < (m & ~3); i += 4)
@ -852,6 +853,26 @@ void CPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& deepCopyFrom)
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.Data(), 0);
}
#if 0
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom)
{
deepCopyFrom.AssignColumnSliceToDense(*this, 0, deepCopyFrom.GetNumCols());
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
#endif
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, const size_t matrixFlags)
{
@ -1093,18 +1114,15 @@ void CPUMatrix<ElemType>::AddGaussianRandomValue(const ElemType mean, const Elem
//maskRate: percentage of values masked out (similar to dropout rate)
//scaleValue: which scale value to set to the left ones (unmasked items).
template <class ElemType>
void CPUMatrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed)
void CPUMatrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle)
{
if (IsEmpty())
LogicError("SetUniformRandomValue: Matrix is empty.");
CPURNGHandle* cpuRNGHandle = dynamic_cast<CPURNGHandle*>(&rngHandle);
assert(cpuRNGHandle != nullptr);
auto& us = *this;
#ifdef _MSC_VER // TODO: check if available under GCC/Linux
std::ranlux64_base_01 generator;
generator.seed(seed == USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
#else
std::default_random_engine generator(seed == USE_TIME_BASED_SEED ? (unsigned long) time(NULL) : seed);
#endif
std::uniform_real_distribution<ElemType> r(0, 1);
long m = (long) GetNumRows(), n = (long) GetNumCols();
@ -1114,19 +1132,19 @@ void CPUMatrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const El
// four-way unrolling
for (long i = 0; i < (m & ~3); i += 4)
{
v = r(generator);
v = r(cpuRNGHandle->Generator());
us(i, j) = v <= maskRate ? 0 : scaleValue;
v = r(generator);
v = r(cpuRNGHandle->Generator());
us(i + 1, j) = v <= maskRate ? 0 : scaleValue;
v = r(generator);
v = r(cpuRNGHandle->Generator());
us(i + 2, j) = v <= maskRate ? 0 : scaleValue;
v = r(generator);
v = r(cpuRNGHandle->Generator());
us(i + 3, j) = v <= maskRate ? 0 : scaleValue;
}
// handle remaining stuffs
for (long i = m & ~3; i < m; i++)
{
v = r(generator);
v = r(cpuRNGHandle->Generator());
us(i, j) = v <= maskRate ? 0 : scaleValue;
}
}
@ -1365,7 +1383,6 @@ void CPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols
// Resize() -- change matrix size
// This function is cheap if the matrix size does not change.
// Current content is not preserved.
// BUGBUG: There is code that relies on zero initialization (without, we get subtle variations of output). That is wrong--we should initialize to QNaN and see where it fails.
// If growOnly is true, resize will not reallocate memory if the current memory is large enough (i.e., will not shrink).
// If this object does not own its memory then new memory cannot be allocated (one can still shrink and/or reshape).
template <class ElemType>
@ -1394,8 +1411,9 @@ void CPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
}
// success
m_numRows = numRows;
m_numCols = numCols;
m_sliceViewOffset = 0;
m_numRows = numRows;
m_numCols = numCols;
}
// allocated by the callee but should be deleted by the caller
@ -6290,6 +6308,10 @@ template CPUMatrix<char>& CPUMatrix<char>::operator=(CPUMatrix<char>&&);
template void CPUMatrix<char>::SetValue(const char);
template void CPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, char* pArray, size_t matrixFlags);
template void CPUMatrix<char>::SetValue(CPUMatrix<char> const&);
//template void CPUMatrix<char>::SetValue(GPUMatrix<char> const&);
//template void CPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
//template void CPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template void CPUMatrix<char>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly);
template void CPUMatrix<char>::Resize(const size_t numRows, const size_t numCols, bool growOnly);
template CPUMatrix<int>::CPUMatrix(const size_t, const size_t, int*, const size_t);

Просмотреть файл

@ -8,11 +8,16 @@
#include "File.h"
#include "Helpers.h"
#include "CommonMatrix.h"
#include "CPURNGHandle.h"
#include <vector>
#include <stdio.h>
#include <ctime>
#include <limits.h>
//#include "GPUMatrix.h"
//#include "CPUSparseMatrix.h"
//#include "GPUSparseMatrix.h"
// NOTE NOTE NOTE:
// use CPUSingleMatrix and CPUDoubleMatrix instead of using the template directly
///////////////////////////////////////////////
@ -127,6 +132,9 @@ public:
void SetValue(const ElemType v);
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
//void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
//void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
//void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
@ -139,7 +147,7 @@ public:
void SetDiagonalValue(const CPUMatrix<ElemType>& vector);
void SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
void SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed = USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle);
void AddGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
CPUMatrix<ElemType> Transpose();

Просмотреть файл

@ -0,0 +1,24 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CPUMatrix.cpp : full implementation of all matrix functions on the CPU side
//
#include "stdafx.h"
#include "CPURNGHandle.h"
namespace Microsoft { namespace MSR { namespace CNTK {
CPURNGHandle::CPURNGHandle(int deviceId, unsigned long seed)
: RNGHandle(deviceId)
{
#ifdef _MSC_VER // TODO: check if available under GCC/Linux
m_generator.reset(new std::ranlux64_base_01());
m_generator->seed(seed);
#else
m_generator.reset(new std::default_random_engine(seed));
#endif
}
}}}

Просмотреть файл

@ -0,0 +1,42 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CPUMatrix.cpp : full implementation of all matrix functions on the CPU side
//
#pragma once
#include "RNGHandle.h"
#include <memory>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK {
class CPURNGHandle : public RNGHandle
{
public:
CPURNGHandle(int deviceId, unsigned long seed);
#ifdef _MSC_VER // TODO: check if available under GCC/Linux
std::ranlux64_base_01& Generator()
{
return *m_generator;
}
private:
std::unique_ptr<std::ranlux64_base_01> m_generator;
#else
std::default_random_engine& Generator()
{
return *m_generator;
}
private:
std::unique_ptr<std::default_random_engine> m_generator;
#endif
};
}}}

Просмотреть файл

@ -264,8 +264,36 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
}
if (v.m_sliceViewOffset > 0)
{
CPUSPARSE_INDEX_TYPE* loc = (GetFormat() == matrixFormatSparseCSC) ? ColLocation() : RowLocation();
size_t len = (GetFormat() == matrixFormatSparseCSC) ? ColSize() : RowSize();
CPUSPARSE_INDEX_TYPE offset = loc[0];
for (size_t c = 0; c < len; c++)
loc[c] -= offset;
}
}
#if 0
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
#endif
template <class ElemType>
void CPUSparseMatrix<ElemType>::MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val)
{
@ -392,7 +420,7 @@ CPUSparseMatrix<ElemType> CPUSparseMatrix<ElemType>::ColumnSlice(size_t startCol
}
template <class ElemType>
CPUMatrix<ElemType> CPUSparseMatrix<ElemType>::CopyColumnSliceToDense(size_t startColumn, size_t numCols) const
void CPUSparseMatrix<ElemType>::AssignColumnSliceToDense(CPUMatrix<ElemType>& slice, size_t startColumn, size_t numCols) const
{
if (startColumn + numCols > m_numCols)
InvalidArgument("The slice (%d+%d) is out of range of the source matrix (%d).", (int) startColumn, (int) numCols, (int) m_numCols);
@ -400,8 +428,10 @@ CPUMatrix<ElemType> CPUSparseMatrix<ElemType>::CopyColumnSliceToDense(size_t sta
if (GetFormat() != MatrixFormat::matrixFormatSparseCSC)
NOT_IMPLEMENTED;
CPUMatrix<ElemType> slice(m_numRows, numCols);
// We can either error out or RequireSize. Because RequireSize will error out if it's not allowed, I think this makes more sense.
slice.RequireSize(m_numRows, numCols);
memset(slice.Data(), 0, sizeof(ElemType) * slice.GetNumElements());
#pragma omp parallel for
for (long j = 0; j < numCols; j++)
{
@ -416,6 +446,14 @@ CPUMatrix<ElemType> CPUSparseMatrix<ElemType>::CopyColumnSliceToDense(size_t sta
}
}
}
template <class ElemType>
CPUMatrix<ElemType> CPUSparseMatrix<ElemType>::CopyColumnSliceToDense(size_t startColumn, size_t numCols) const
{
CPUMatrix<ElemType> slice(m_numRows, numCols);
AssignColumnSliceToDense(slice, startColumn, numCols);
return slice;
}
@ -1339,15 +1377,20 @@ template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char> const&);
template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char>&&);
template CPUSparseMatrix<char>& CPUSparseMatrix<char>::operator=(CPUSparseMatrix<char>&& moveFrom);
template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
//template void CPUSparseMatrix<char>::SetValue(CPUMatrix<char> const&);
//template void CPUSparseMatrix<char>::SetValue(GPUMatrix<char> const&);
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
//template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template char* CPUSparseMatrix<char>::Data() const;
template char* CPUSparseMatrix<char>::Data();
template void CPUSparseMatrix<char>::Reset(void);
template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const MatrixFormat, const bool, bool);
template CPUSparseMatrix<char>::~CPUSparseMatrix();
template CPUSparseMatrix<char> CPUSparseMatrix<char>::ColumnSlice(size_t startColumn, size_t numCols) const;
template CPUMatrix<char> CPUSparseMatrix<char>::CopyColumnSliceToDense(size_t startColumn, size_t numCols) const;
template void CPUSparseMatrix<char>::AssignColumnSliceToDense(CPUMatrix<char>&, size_t startColumn, size_t numCols) const;
template CPUSparseMatrix<char>& CPUSparseMatrix<char>::operator=(const CPUSparseMatrix<char>& deepCopyFrom);
template CPUSparseMatrix<int>::CPUSparseMatrix(const MatrixFormat, const size_t, const size_t, const size_t);

Просмотреть файл

@ -6,6 +6,8 @@
#include <stdio.h>
#include "CPUMatrix.h"
//#include "GPUMatrix.h"
//#include "GPUSparseMatrix.h"
#include <map>
#include <unordered_map>
@ -82,7 +84,11 @@ public:
public:
void SetValue(const size_t row, const size_t col, ElemType val);
//void SetValue(const CPUMatrix<ElemType>& /*val*/);
//void SetValue(const GPUMatrix<ElemType>& /*val*/);
void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
//void SetValue(const GPUSparseMatrix<ElemType>& /*val*/);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
size_t BufferSize() const
@ -98,6 +104,7 @@ public:
CPUSparseMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType> CopyColumnSliceToDense(size_t startColumn, size_t numCols) const;
void AssignColumnSliceToDense(CPUMatrix<ElemType>& slice, size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType> DiagonalToDense() const;

Просмотреть файл

@ -175,7 +175,7 @@ __global__ void kComputeBatchMeanAndInvStdDev(int vectorSize, int batchSize, con
assert(gridDim.y == 1);
assert(gridDim.z == 1);
assert(::isfinite(epsilon) && epsilon > 0);
assert(::isfinite(expAvgFactor) && expAvgFactor > 0);
assert(::isfinite(expAvgFactor) && expAvgFactor >= 0);
int irowSrcBase = (blockIdx.x * BlockDimX + threadIdx.x) * U;
if (irowSrcBase >= vectorSize)

Просмотреть файл

@ -269,4 +269,4 @@ __global__ void kAveragePoolingBackward(int batchSize, const int* mpRowCol, cons
}
}
} } }
}}}

Просмотреть файл

@ -312,7 +312,7 @@ protected:
if (in.GetMatrixType() == MatrixType::DENSE || m_gpuSparse1D)
inputSubBatch = in.ColumnSlice(startSampleId, smallBatchSize);
else
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize), in.GetFormat());
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize));
if (m_gpuSparseOpt)
{

Просмотреть файл

@ -313,10 +313,12 @@ private:
template <typename TAlgo, typename TFinder, typename TStaticFinder>
void FindBestAlgo(size_t batchSize, TAlgo& algo, TFinder finder, TStaticFinder staticFinder)
{
if (!algo.NeedAutotuning(batchSize))
return;
m_inT.UpdateBatchSize(batchSize);
m_outT.UpdateBatchSize(batchSize);
if (!algo.NeedAutotuning(batchSize))
return;
using CuDnnAlgoT = decltype(TAlgo::Algo);
CuDnnAlgoT algoPerf[MaxAlgoCount];
int calgo = 0;
@ -327,7 +329,7 @@ private:
{
decltype(CuDnnAlgoT::algo) noMemAlgo;
CUDNN_CALL(staticFinder(noMemAlgo));
algo.CurMBSize = batchSize;
algo.MaxAllowedMBSizeForCurrentAlgo = batchSize;
algo.Algo = algoPerf[0];
algo.Algo.algo = noMemAlgo;
algo.Algo.memory = 0;
@ -347,7 +349,7 @@ private:
});
if (res == algoPerf + calgo)
RuntimeError("cuDNN could not find suitable algorithm for the current convolution configuration.");
algo.CurMBSize = batchSize;
algo.MaxAllowedMBSizeForCurrentAlgo = batchSize;
algo.Algo = *res;
// Find fastest algorithm that does NOT require workspace. It is used as a fallback algo in Forward function.
res = std::find_if(algoPerf, algoPerf + calgo,
@ -380,13 +382,14 @@ private:
using CuDnnAlgoT = decltype(T::algo);
ConvAlgoInfo()
: CurMBSize(0)
: MaxAllowedMBSizeForCurrentAlgo(0)
{
Algo.status = CUDNN_STATUS_NOT_INITIALIZED;
NoWorkspaceAlgo = (CuDnnAlgoT)-1;
}
// Current mini-batch size, needed for re-computing statistics in auto-tuner.
size_t CurMBSize;
size_t MaxAllowedMBSizeForCurrentAlgo;
T Algo;
CuDnnAlgoT NoWorkspaceAlgo;
@ -399,7 +402,7 @@ private:
// We also need to reset auto-tuning status at the beginning of each epoch but ComputationNode currently does not provide such notification.
// We assume no other dimensions of tensors can change so we don't check it.
// REVIEW alexeyk: review once we get response from NVIDIA.
return (Algo.status != CUDNN_STATUS_SUCCESS || batchSize > CurMBSize);
return (Algo.status != CUDNN_STATUS_SUCCESS || batchSize > MaxAllowedMBSizeForCurrentAlgo);
}
};
@ -432,7 +435,8 @@ bool CuDnnConvolutionEngineFactory<ElemType>::IsSupported(DEVICEID_TYPE deviceId
// REVIEW alexeyk: IsSupported check should be performed by cuDNN itself. Is there a good way to do that?
cudaDeviceProp props = {0};
if (cudaGetDeviceProperties(&props, deviceId) != cudaSuccess || props.major < 3)
// Note that cudaGetDeviceProperties also sets CUDA last error so need to check/clear both.
if (deviceId < 0 || (cudaGetDeviceProperties(&props, deviceId) | cudaGetLastError()) != cudaSuccess || props.major < 3)
return false;
const auto& input = geometry->InputShape();

Просмотреть файл

@ -11,7 +11,7 @@
#include "GPUMatrix.h"
#include "GPUMatrixCUDAKernels.cuh"
#include "GPUSparseMatrix.h"
//#include "GPUSparseMatrix.h"
#include "GPUTensor.h"
#include "CommonMatrix.h"
#define TENSOR_OPS_DECL __device__ __host__
@ -450,7 +450,7 @@ template <class ElemType>
GPUMatrix<ElemType>::GPUMatrix(const GPUMatrix<ElemType>& deepCopyFrom)
{
ZeroInit();
SetValue(deepCopyFrom);
SetValue(deepCopyFrom);
}
template <class ElemType>
@ -886,11 +886,11 @@ __global__ void _doGatherColumnsOf(ElemType* us, size_t usStride, const ElemType
CUDA_LONG jOut = id / usStride; // col index into 'us' and 'idx'
auto jInF = idx[jOut * idxStride]; // this is the column we need to get
if (jInF < 0) // negative index means gap
if (::isnan(jInF) || jInF < 0) // negative index means gap
return;
size_t jIn = (size_t)jInF;
if (jIn >= aCols)
return; // actually a failure
//if (jIn >= aCols)
// return; // actually a failure
const ElemType& ra = a[ i + jIn * aStride ];
ElemType& rus = us[id/*i + jOut * usStride*/];
@ -929,6 +929,21 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::DoGatherColumnsOf(ElemType beta, const
return *this;
}
// little helper for debugging
template <class ElemType>
static void Peek(const GPUMatrix<ElemType>& m, const char* which)
{
size_t rows = m.GetNumRows();
size_t cols = m.GetNumCols();
ElemType buf[10000] = { 0 };
size_t n = min(rows * cols, _countof(buf));
CUDA_CALL(cudaMemcpy(buf, m.Data(), sizeof(ElemType) * n, cudaMemcpyDeviceToHost));
UNUSED(which); UNUSED(rows); UNUSED(cols); sin(1.0f); // set breakpoint here
//CUDA_CALL(cudaMemcpy(const_cast<ElemType*>(m.Data()), buf, sizeof(ElemType) * n, cudaMemcpyHostToDevice));
}
#define ALLOW_ATOMIC_SCATTER // allow to disable this, until we know atomicAdd() works properly here
template <class ElemType>
__global__ void _doScatterColumnsOf(ElemType* us, size_t usStride, size_t usCols, const ElemType* idx, size_t idxStride, const ElemType* a, size_t aStride, const ElemType alpha, CUDA_LONG numElements)
{
@ -941,35 +956,26 @@ __global__ void _doScatterColumnsOf(ElemType* us, size_t usStride, size_t usCols
CUDA_LONG i = id % aStride; // row index into 'a' and 'us'
CUDA_LONG jIn = id / aStride; // col index into 'a' and 'idx'
auto jOutF = idx[jIn * idxStride]; // this is the column we copy/add into
if (jOutF < 0) // negative index means gap
auto jOutF = idx[jIn * idxStride]; // this is the column we copy/add into
if (::isnan(jOutF) || jOutF < 0) // negative index means gap
return;
size_t jOut = (size_t)jOutF;
if (jOut >= usCols)
return; // actually a failure --TODO: This should not be necessary. Why is it?
//if (jOut >= usCols)
// return; // actually a failure --TODO: This should not be necessary. Why is it?
const ElemType& ra = a[id/*i + jIn * aStride*/];
ElemType& rus = us[ i + jOut * usStride ];
ElemType res = ra * alpha;
if (res != 0) // avoid memory conflict if e.g. an entire column has no gradient
#ifdef ALLOW_ATOMIC_SCATTER
atomicAdd(&rus, res); // rus += res;
#else
rus += res;
#endif
// Note: atomicAdd() is supposed to be fast in case of no conflict (the simple case of Scatter())
}
// little helper for debugging
template <class ElemType>
static void Peek(const GPUMatrix<ElemType>& m, const char* which)
{
size_t rows = m.GetNumRows();
size_t cols = m.GetNumCols();
ElemType buf[10000] = { 0 };
size_t n = min(rows * cols, _countof(buf));
CUDA_CALL(cudaMemcpy(buf, m.Data(), sizeof(ElemType) * n, cudaMemcpyDeviceToHost));
UNUSED(which); UNUSED(rows); UNUSED(cols); sin(1.0f); // set breakpoint here
//CUDA_CALL(cudaMemcpy(const_cast<ElemType*>(m.Data()), buf, sizeof(ElemType) * n, cudaMemcpyHostToDevice));
}
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
template <class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, const GPUMatrix<ElemType>& idx, const GPUMatrix<ElemType>& a, ElemType alpha)
@ -987,6 +993,27 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, cons
auto& us = *this;
#ifndef ALLOW_ATOMIC_SCATTER // verify that atomicAdd is not needed --this is not efficient
{
vector<ElemType> buf(idx.GetNumRows() * idx.GetNumCols()); // idx(,)are the column(s) we copy/add into
CUDA_CALL(cudaMemcpy(buf.data(), idx.Data(), sizeof(ElemType) * buf.size(), cudaMemcpyDeviceToHost));
vector<bool> writtenTo(GetNumCols(), false); // remember whether an output column is in fact a target
for (size_t i = 0; i < buf.size(); i++)
{
auto colF = buf[i];
if (std::isnan(colF) || colF < 0)
continue;
size_t col = (size_t)colF;
if (col >= GetNumCols())
LogicError("DoScatterColumnsOf: Index value out of bounds.");
if (writtenTo[col])
LogicError("DoScatterColumnsOf: #ifndef ALLOW_ATOMIC_SCATTER then columns must be unique. Column idx(%d,%d)=%d is used twice.", (int)(i % idx.GetNumCols()), (int)(i / idx.GetNumCols()), (int)col);
else
writtenTo[col] = true;
}
}
#endif
// pre-scale with beta upfront
// Scatter may add more than one source column to the same target, so we must pre-scale with beta, and then just keep adding.
Scale(beta, us); // if beta is 0, then this will be a memset()
@ -1091,9 +1118,29 @@ void GPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& deepCopyFrom)
if (this == &deepCopyFrom)
return;
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.GetComputeDeviceId(), deepCopyFrom.Data(), matrixFlagSetValueOnDevice);
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.GetComputeDeviceId(), deepCopyFrom.Data(), matrixFlagSetValueOnDevice);
}
#if 0
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom)
{
deepCopyFrom.CopyToDenseMatrix(*this);
}
#endif
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags)
{
@ -1225,21 +1272,22 @@ void GPUMatrix<ElemType>::SetGaussianRandomValue(const ElemType mean, const Elem
//maskRate: percentage of values masked out (similar to dropout rate)
//scaleValue: which scale value to set to the left ones (unmasked items).
template <class ElemType>
void GPUMatrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed)
void GPUMatrix<ElemType>::SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle)
{
PrepareDevice();
CreateCurandObject(seed, __FUNCTION__); // TODO call ResetCurandObject() instead?
GPURNGHandle* gpuRNGHandle = dynamic_cast<GPURNGHandle*>(&rngHandle);
assert(gpuRNGHandle != nullptr);
cudaEvent_t done = nullptr;
CUDA_CALL(cudaEventCreate(&done)); // TODO: why not condition on do_sync, so that we can use SyncGuard?
if (sizeof(ElemType) == sizeof(float))
CURAND_CALL(curandGenerateUniform((((curandGenerator_t*) s_curandGenerator)[0]), reinterpret_cast<float*>(Data()), GetNumElements()));
CURAND_CALL(curandGenerateUniform(gpuRNGHandle->Generator(), reinterpret_cast<float*>(Data()), GetNumElements()));
else
CURAND_CALL(curandGenerateUniformDouble((((curandGenerator_t*) s_curandGenerator)[0]), reinterpret_cast<double*>(Data()), GetNumElements()));
CURAND_CALL(curandGenerateUniformDouble(gpuRNGHandle->Generator(), reinterpret_cast<double*>(Data()), GetNumElements()));
CUDA_CALL(cudaEventRecord(done));
CUDA_CALL(cudaEventSynchronize(done));
CUDA_CALL(cudaEventDestroy(done));
// CURAND_CALL(curandDestroyGenerator(gen));
size_t N = GetNumElements();
size_t blocksPerGrid = (size_t) ceil(N / (double) GridDim::maxThreadsPerBlock);
@ -1420,29 +1468,27 @@ void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, boo
if (GetNumRows() == numRows && GetNumCols() == numCols)
return;
size_t numElements = numRows * numCols;
if (numElements > GetSizeAllocated() || // grow allocation
(!growOnly && numElements != GetSizeAllocated())) // shrink allocation if not growOnly
{
// reallocate buffer if numElements > 0
ElemType* pArray = nullptr;
if (numElements > 0)
pArray = TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), numRows, numCols);
// If the buffer exists, free it
if (Buffer())
TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
SetBuffer(pArray, numElements * sizeof(ElemType));
SetSizeAllocated(numElements);
}
// success
m_sliceViewOffset = 0;
m_numRows = numRows;
m_numCols = numCols;
size_t numElements = GetNumElements();
if (numElements > GetSizeAllocated() || (!growOnly && numElements != GetSizeAllocated()))
{
if (IsEmpty())
{
SetSizeAllocated(0);
SetBuffer(nullptr, 0);
}
else
{
if (Buffer())
{
TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
}
SetSizeAllocated(numElements);
SetBuffer(TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), m_numRows, m_numCols), numElements * sizeof(ElemType));
CUDA_CALL(cudaMemset(Buffer(), 0, sizeof(ElemType) * GetSizeAllocated()));
}
}
m_sliceViewOffset = 0;
}
template <class ElemType>
@ -2711,7 +2757,7 @@ void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<E
reinterpret_cast<uint8_t*&>(inIdx) += sizeof(uint64_t) - cbAlign;
outIdx = inIdx + celt;
void* ptmp = outIdx + celt;
assert(reinterpret_cast<ElemType*>(reinterpret_cast<uint8_t*>(ptmp) + cbtemp) <= workspace->Data()+ workspace->GetNumElements());
assert(reinterpret_cast<ElemType*>(reinterpret_cast<uint8_t*>(ptmp) + cbtemp) <= workspace->Data() + workspace->GetNumElements());
// Initialize indices.
const int ThreadsPerBlock = 128;
@ -4390,7 +4436,10 @@ template GPUMatrix<char>& GPUMatrix<char>::operator=(GPUMatrix<char>&&);
template GPUMatrix<char>::GPUMatrix(int);
template void GPUMatrix<char>::SetValue(const char);
template void GPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
//template void GPUMatrix<char>::SetValue(CPUMatrix<char> const&);
template void GPUMatrix<char>::SetValue(GPUMatrix<char> const&);
//template void GPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
//template void GPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template GPUMatrix<int>::GPUMatrix(const size_t, const size_t, int, int*, const size_t);
template GPUMatrix<int>::~GPUMatrix();

Просмотреть файл

@ -11,6 +11,7 @@
#include "TensorShape.h" // only for SmallVector; I was hoping to keep this out
#include "BestGpu.h" // for CPUONLY macro
#include "ConcStack.h"
#include "GPURNGHandle.h"
#include <string>
#include <vector>
#include <array>
@ -19,6 +20,10 @@
#include <memory> // for unique_ptr
#include <limits.h> // for ULONG_MAX
//#include "CPUMatrix.h"
//#include "CPUSparseMatrix.h"
//#include "GPUSparseMatrix.h"
#ifndef _WIN32
#include <unistd.h>
#endif
@ -225,14 +230,17 @@ public:
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);
//void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
//void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
//void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
void SetDiagonalValue(const ElemType v);
void SetDiagonalValue(const GPUMatrix<ElemType>& vector);
void SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed = USE_TIME_BASED_SEED);
void SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed = USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, unsigned long seed = USE_TIME_BASED_SEED);
void SetUniformRandomMask(const ElemType maskRate, const ElemType scaleValue, RNGHandle& rngHandle);
GPUMatrix<ElemType> Transpose() const;
GPUMatrix<ElemType>& AssignTransposeOf(const GPUMatrix<ElemType>& a);
@ -642,7 +650,10 @@ public:
{
m_done = nullptr;
if (DoSync())
{
CUDA_CALL(cudaGetLastError());
CUDA_CALL(cudaEventCreate(&m_done));
}
}
~SyncGuard()
{

Просмотреть файл

@ -47,6 +47,7 @@
// NVIDIA should fix their CUDA 8.0 headers
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
// CUDA atomicAdd() only exists for 'float'. This is the 'double' version.
// TODO: This may need to be guarded by CUDA version; newer devices may support this.
static __inline__ __device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
@ -100,7 +101,7 @@ static INT CeilDiv(INT a, INT2 b) // ceil(a/b)
struct GridDim
{
static const CUDA_LONG maxThreadsPerBlock = 512; // use this many threads per block
static const CUDA_LONG maxWarpsPerBlock = 16; // use this many warps per block
static const CUDA_LONG maxWarpsPerBlock = 16; // use this many warps per block. This means 512 threads for warpSize=32
// use these for launching
// GridDim grid(NN);
@ -123,6 +124,7 @@ struct GridDim
CUDA_LONG warpsPerProc = CeilDiv(N, numProcs * warpSize);
// if too many warps per block then reduce #warps
// This limits the number of threads to 512.
if (warpsPerProc > maxWarpsPerBlock)
{
CUDA_LONG overBy = CeilDiv(warpsPerProc, maxWarpsPerBlock); // we are over by this factor
@ -130,7 +132,7 @@ struct GridDim
}
// put it back together
m_threadsPerBlock = warpsPerProc * warpSize;
m_threadsPerBlock = warpsPerProc * warpSize; // =a multiple of 32 that is as close to 512 as makes sense given NN
m_blocksPerGrid = CeilDiv(N, m_threadsPerBlock);
if (m_blocksPerGrid == 1)
m_threadsPerBlock = N; // don't launch more than necessary --TODO: Does this make a difference at all?
@ -151,13 +153,18 @@ struct GridDim
return props;
}
static size_t GetCurrentDeviceId()
{
int deviceId;
cudaGetDevice(&deviceId);
return (size_t)deviceId;
}
// get device properties of current device
static const cudaDeviceProp& GetDeviceProps()
{
static std::vector<cudaDeviceProp> props = CacheDeviceProps(); // thread-safe according to C++ standard
int deviceId;
cudaGetDevice(&deviceId);
return props[deviceId];
return props[GetCurrentDeviceId()];
}
// compute our location on the grid
@ -3157,7 +3164,8 @@ __global__ void _scaleSparseBlockAndAddToDense(
rhs[IDX2C(row, col, numRows)] += alpha * lhsValues[index];
}
// compute predictions in cross entory node
#if 0
// compute predictions in cross entropy node
template <class ElemType>
__global__ void _computePrediction(
int nv,
@ -3340,6 +3348,7 @@ __global__ void _computeGradientOfInput(
atomicAdd(&grd[IDX2C(h, j, numrows)], sum);
}
#endif
template <class ElemType>
__global__ void computeNCEForwardProp(
@ -3718,6 +3727,8 @@ __global__ void _assignNceDerivativeNew(
atomicAdd(&c[wid], -er);
}
}
#if 0
// compute gradients of weights in cross entropy node
template <class ElemType>
__global__ void _computeGradientOfWeight(
@ -3779,6 +3790,7 @@ __global__ void _computeGradientOfWeight(
blockIds[ii] = i;
}
}
#endif
// used in clipping gradients
template <class ElemType>

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше