Merge branch 'master' into qiwye/asgd-dev

Conflicts:
	Source/ActionsLib/ActionsLib.vcxproj
	Source/Readers/HTKMLFReader/utterancesourcemulti.h
	Source/SGDLib/SGDLib.vcxproj
This commit is contained in:
Qiwei Ye 2016-04-28 13:56:19 +08:00
Родитель 7ce849065a c34e358c2a
Коммит 679b55df50
292 изменённых файлов: 63464 добавлений и 38091 удалений

4
.gitattributes поставляемый
Просмотреть файл

@ -9,6 +9,7 @@ Dockerfile-GPU text
*.post text
*.cpu text
*.gpu text
*.rst text
.gitattributes text
.gitignore text
@ -75,6 +76,9 @@ mean.363 text
var.363 text
prior.132 text
# dot (graph description language) file
*.dot text
# AMI-specific
Results text
40fbank.conf text

Просмотреть файл

@ -47,4 +47,52 @@
<CudaLibPath>$(CudaPath)\lib\$(Platform)</CudaLibPath>
</PropertyGroup>
<!-- TODO warn if ConfigurationType not (yet) defined -->
<PropertyGroup Condition="'$(ConfigurationType)' == 'StaticLibrary'">
<UseDebugLibraries>$(DebugBuild)</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<WholeProgramOptimization>$(ReleaseBuild)</WholeProgramOptimization>
<LinkIncremental>$(DebugBuild)</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(ConfigurationType)' == 'StaticLibrary'">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<TreatWarningAsError>true</TreatWarningAsError>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(ConfigurationType)' == 'StaticLibrary' And $(DebugBuild)">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<StackReserveSize>100000000</StackReserveSize>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(ConfigurationType)' == 'StaticLibrary' And $(ReleaseBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
</Project>

118
CNTK.sln
Просмотреть файл

@ -7,8 +7,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTK", "Source\CNTK\CNTK.vc
ProjectSection(ProjectDependencies) = postProject
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {EB2BE26F-6BD4-4274-971F-86D080779DD1}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{D45DF403-6781-444E-B654-A96868C5BE68}"
@ -129,15 +131,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{19EE975B-2
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ComputationNetworkLib", "Source\ComputationNetworkLib\ComputationNetworkLib.vcxproj", "{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SGDLib", "Source\SGDLib\SGDLib.vcxproj", "{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}"
ProjectSection(ProjectDependencies) = postProject
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelTraining", "ParallelTraining", "{5E666C53-2D82-49C9-9127-3FDDC321C741}"
ProjectSection(SolutionItems) = preProject
@ -278,14 +273,10 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MathTests", "Tests\UnitTests\MathTests\MathTests.vcxproj", "{4701E678-5E6F-470D-B348-9CD1A2C095D1}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ActionsLib", "Source\ActionsLib\ActionsLib.vcxproj", "{EB2BE26F-6BD4-4274-971F-86D080779DD1}"
ProjectSection(ProjectDependencies) = postProject
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceTraining", "SequenceTraining", "{BB8B9FC5-C4B3-477F-80E2-665DC8E431BD}"
ProjectSection(SolutionItems) = preProject
@ -363,6 +354,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReaderTests", "Tests\UnitTe
{9BD0A711-0BBD-45B6-B81C-053F03C26CFB} = {9BD0A711-0BBD-45B6-B81C-053F03C26CFB}
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{91973E60-A7BE-4C86-8FDB-59C88A0B3715} = {91973E60-A7BE-4C86-8FDB-59C88A0B3715}
{7B7A51ED-AA8E-4660-A805-D50235A02120} = {7B7A51ED-AA8E-4660-A805-D50235A02120}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
@ -373,10 +365,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "EvalDll", "Source\EvalDll\E
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {EB2BE26F-6BD4-4274-971F-86D080779DD1}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Math", "Source\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
ProjectSection(ProjectDependencies) = postProject
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {B3DD765E-694E-4494-BAD7-37BBF2942517}
EndProjectSection
EndProject
@ -385,46 +379,55 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibSVMBinaryReader", "Source\Readers\LibSVMBinaryReader\LibSVMBinaryReader.vcxproj", "{D667AF32-028A-4A5D-BE19-F46776F0F6B2}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BinaryReader", "Source\Readers\BinaryReader\BinaryReader.vcxproj", "{1D5787D4-52E4-45DB-951B-82F220EE0C6A}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DSSMReader", "Source\Readers\DSSMReader\DSSMReader.vcxproj", "{014DA766-B37B-4581-BC26-963EA5507931}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HTKMLFReader", "Source\Readers\HTKMLFReader\HTKMLFReader.vcxproj", "{33D2FD22-DEF2-4507-A58A-368F641AEBE5}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LMSequenceReader", "Source\Readers\LMSequenceReader\LMSequenceReader.vcxproj", "{9A2F2441-5972-4EA8-9215-4119FCE0FB68}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LUSequenceReader", "Source\Readers\LUSequenceReader\LUSequenceReader.vcxproj", "{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SparsePCReader", "Source\Readers\SparsePCReader\SparsePCReader.vcxproj", "{CE429AA2-3778-4619-8FD1-49BA3B81197B}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UCIFastReader", "Source\Readers\UCIFastReader\UCIFastReader.vcxproj", "{E6646FFE-3588-4276-8A15-8D65C22711C1}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "EvalTest", "Tests\UnitTests\EvalTest\EvalTest.vcxproj", "{731312A8-6DA3-4841-AFCD-57520BA1BF8E}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MathPerformanceTests", "Tests\UnitTests\MathPerformanceTests\MathPerformanceTests.vcxproj", "{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}"
@ -453,28 +456,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "GPU", "GPU", "{2A1F0FB0-230
Tests\EndToEndTests\LM\RNNLM\GPU\rnnlm.cntk = Tests\EndToEndTests\LM\RNNLM\GPU\rnnlm.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{BFBC6BE1-C33E-4A80-B8F3-A33410EC00FC}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\SLU\atis.dev.IOB.simple = Tests\EndToEndTests\SLU\atis.dev.IOB.simple
Tests\EndToEndTests\SLU\atis.test.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\SLU\atis.test.apos.pred.pos.head.IOB.simple
Tests\EndToEndTests\SLU\atis.train.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\SLU\atis.train.apos.pred.pos.head.IOB.simple
Tests\EndToEndTests\SLU\baseline.linux.cpu.txt = Tests\EndToEndTests\SLU\baseline.linux.cpu.txt
Tests\EndToEndTests\SLU\baseline.linux.gpu.txt = Tests\EndToEndTests\SLU\baseline.linux.gpu.txt
Tests\EndToEndTests\SLU\baseline.windows.cpu.txt = Tests\EndToEndTests\SLU\baseline.windows.cpu.txt
Tests\EndToEndTests\SLU\baseline.windows.gpu.txt = Tests\EndToEndTests\SLU\baseline.windows.gpu.txt
Tests\EndToEndTests\SLU\globals.cntk = Tests\EndToEndTests\SLU\globals.cntk
Tests\EndToEndTests\SLU\input.txt = Tests\EndToEndTests\SLU\input.txt
Tests\EndToEndTests\SLU\inputmap.txt = Tests\EndToEndTests\SLU\inputmap.txt
Tests\EndToEndTests\SLU\lstm.ndl = Tests\EndToEndTests\SLU\lstm.ndl
Tests\EndToEndTests\SLU\lstmNDL.txt = Tests\EndToEndTests\SLU\lstmNDL.txt
Tests\EndToEndTests\SLU\output.txt = Tests\EndToEndTests\SLU\output.txt
Tests\EndToEndTests\SLU\README.txt = Tests\EndToEndTests\SLU\README.txt
Tests\EndToEndTests\SLU\rnnlu.cntk = Tests\EndToEndTests\SLU\rnnlu.cntk
Tests\EndToEndTests\SLU\rnnlu.ndl.cntk = Tests\EndToEndTests\SLU\rnnlu.ndl.cntk
Tests\EndToEndTests\SLU\run-test = Tests\EndToEndTests\SLU\run-test
Tests\EndToEndTests\SLU\testcases.yml = Tests\EndToEndTests\SLU\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MNIST", "MNIST", "{FA33A61E-95C7-4049-8111-22058CE361A3}"
ProjectSection(SolutionItems) = preProject
Examples\Image\MNIST\README.md = Examples\Image\MNIST\README.md
@ -773,9 +754,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ConvBatchNorm", "03_Conv
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReaderLib", "Source\Readers\ReaderLib\ReaderLib.vcxproj", "{F0A9637C-20DA-42F0-83D4-23B4704DE602}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Simple2d", "Simple2d", "{D456FA9C-A51C-48B9-87DE-0F7D8A910265}"
EndProject
@ -841,32 +819,31 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PennTreebank", "PennTreeban
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "RNN", "RNN", "{B72C5B0E-38E8-41BF-91FE-0C1012C7C078}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.debug.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.debug.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.debug.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.debug.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.release.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.release.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.release.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.release.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.debug.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.debug.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.debug.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.debug.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.release.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.release.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.release.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.release.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.cpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.cpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.gpu.txt = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\baseline.windows.gpu.txt
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\run-test = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\run-test
Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\testcases.yml = Tests\EndToEndTests\Examples\Text\PennTreebank\RNN\testcases.yml
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKTextFormatReader", "Source\Readers\CNTKTextFormatReader\CNTKTextFormatReader.vcxproj", "{91973E60-A7BE-4C86-8FDB-59C88A0B3715}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ExperimentalHTKMLFReader", "Source\Readers\ExperimentalHTKMLFReader\ExperimentalHTKMLFReader.vcxproj", "{7B7A51ED-AA8E-4660-A805-D50235A02120}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ImageReader", "Source\Readers\ImageReader\ImageReader.vcxproj", "{9BD0A711-0BBD-45B6-B81C-053F03C26CFB}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{F0A9637C-20DA-42F0-83D4-23B4704DE602} = {F0A9637C-20DA-42F0-83D4-23B4704DE602}
EndProjectSection
EndProject
@ -909,6 +886,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NetworkTests", "Tests\UnitT
ProjectSection(ProjectDependencies) = postProject
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {EB2BE26F-6BD4-4274-971F-86D080779DD1}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
@ -923,8 +901,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{01
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_Convolution", "01_Convolution", "{58286327-6742-44C4-A34E-D2583419E55E}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml
@ -954,6 +931,18 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "04_ResNet", "04_ResNet", "{
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{CCB0CD89-DE53-4104-94D3-041D46FC8885}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{B900D033-DC37-45F1-AE52-F35584FD3024}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.linux.cpu.txt = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.linux.cpu.txt
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.windows.cpu.txt = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.windows.cpu.txt
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.windows.gpu.txt = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\baseline.windows.gpu.txt
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\run-test = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\run-test
Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\testcases.yml = Tests\EndToEndTests\Examples\Text\Miscellaneous\SLU\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{EC780385-7580-4D15-914B-1D878A295CBC}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk = Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk
@ -965,6 +954,28 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-D
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{181664AC-4C95-4798-A923-09B879215B33}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SLU\atis.dev.IOB.simple = Tests\EndToEndTests\Text\SLU\atis.dev.IOB.simple
Tests\EndToEndTests\Text\SLU\atis.test.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\Text\SLU\atis.test.apos.pred.pos.head.IOB.simple
Tests\EndToEndTests\Text\SLU\atis.train.apos.pred.pos.head.IOB.simple = Tests\EndToEndTests\Text\SLU\atis.train.apos.pred.pos.head.IOB.simple
Tests\EndToEndTests\Text\SLU\baseline.linux.cpu.txt = Tests\EndToEndTests\Text\SLU\baseline.linux.cpu.txt
Tests\EndToEndTests\Text\SLU\baseline.linux.gpu.txt = Tests\EndToEndTests\Text\SLU\baseline.linux.gpu.txt
Tests\EndToEndTests\Text\SLU\baseline.windows.cpu.txt = Tests\EndToEndTests\Text\SLU\baseline.windows.cpu.txt
Tests\EndToEndTests\Text\SLU\baseline.windows.gpu.txt = Tests\EndToEndTests\Text\SLU\baseline.windows.gpu.txt
Tests\EndToEndTests\Text\SLU\globals.cntk = Tests\EndToEndTests\Text\SLU\globals.cntk
Tests\EndToEndTests\Text\SLU\input.txt = Tests\EndToEndTests\Text\SLU\input.txt
Tests\EndToEndTests\Text\SLU\inputmap.txt = Tests\EndToEndTests\Text\SLU\inputmap.txt
Tests\EndToEndTests\Text\SLU\lstm.ndl = Tests\EndToEndTests\Text\SLU\lstm.ndl
Tests\EndToEndTests\Text\SLU\output.txt = Tests\EndToEndTests\Text\SLU\output.txt
Tests\EndToEndTests\Text\SLU\rnnlu.cntk = Tests\EndToEndTests\Text\SLU\rnnlu.cntk
Tests\EndToEndTests\Text\SLU\rnnlu.ndl.cntk = Tests\EndToEndTests\Text\SLU\rnnlu.ndl.cntk
Tests\EndToEndTests\Text\SLU\run-test = Tests\EndToEndTests\Text\SLU\run-test
Tests\EndToEndTests\Text\SLU\testcases.yml = Tests\EndToEndTests\Text\SLU\testcases.yml
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Source\Common\Common.vcxproj", "{86883653-8A61-4038-81A0-2379FAE4200A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1187,6 +1198,14 @@ Global
{CDA96AA3-3252-4978-A0BF-2ACD670823CB}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{CDA96AA3-3252-4978-A0BF-2ACD670823CB}.Release|x64.ActiveCfg = Release|x64
{CDA96AA3-3252-4978-A0BF-2ACD670823CB}.Release|x64.Build.0 = Release|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Debug|x64.ActiveCfg = Debug|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Debug|x64.Build.0 = Debug|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release|x64.ActiveCfg = Release|x64
{86883653-8A61-4038-81A0-2379FAE4200A}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -1250,7 +1269,6 @@ Global
{811924DE-2F12-4EA0-BE58-E57BEF3B74D1} = {3BF59CCE-D245-420A-9F17-73CE61E284C2}
{96012801-5187-4FAF-A54E-BF4B73C855F8} = {811924DE-2F12-4EA0-BE58-E57BEF3B74D1}
{2A1F0FB0-2304-4F35-87B3-66230C6E58F0} = {811924DE-2F12-4EA0-BE58-E57BEF3B74D1}
{BFBC6BE1-C33E-4A80-B8F3-A33410EC00FC} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
{FA33A61E-95C7-4049-8111-22058CE361A3} = {9BDFA4BE-790E-408F-915B-5979BB5078C6}
{F99E1E80-50D8-421C-AD94-8ED0DF08C355} = {9BDFA4BE-790E-408F-915B-5979BB5078C6}
{ED57E827-B28F-4BEE-BFB7-398EF8D83357} = {FA33A61E-95C7-4049-8111-22058CE361A3}
@ -1314,7 +1332,11 @@ Global
{AB9207B9-B134-4C57-B7ED-F3DCF7B0DC5F} = {0141526B-F257-4574-8CBE-99634726FFCE}
{12FB912C-43F8-40FE-BD7F-B52F589A1EBC} = {0141526B-F257-4574-8CBE-99634726FFCE}
{2BFE4D88-6F32-4701-887A-1DE3D7626DBB} = {0141526B-F257-4574-8CBE-99634726FFCE}
{CCB0CD89-DE53-4104-94D3-041D46FC8885} = {439BE0E0-FABE-403D-BF2C-A41FB8A60616}
{B900D033-DC37-45F1-AE52-F35584FD3024} = {CCB0CD89-DE53-4104-94D3-041D46FC8885}
{EC780385-7580-4D15-914B-1D878A295CBC} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
{181664AC-4C95-4798-A923-09B879215B33} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
{86883653-8A61-4038-81A0-2379FAE4200A} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -49,7 +49,12 @@ files=(;c:\data.txt;c:\labels.txt)
<td>Parameter Set</td>
<td>
<pre><code>section1=[id=1;size=256] section2=[ subsection=[string="hi";num=5] value=1e-10 array=10:"this is a test":1.25 ]
<pre><code>section1=[id=1;size=256]
section2=[
subsection=[string="hi";num=5]
value=1e-10
array=10:"this is a test":1.25
]
</code></pre>
</td>
@ -66,7 +71,7 @@ files=(;c:\data.txt;c:\labels.txt)
### Organization
In CNTK configuration files Parameter Sets are organized in a hierarchal fashion. The actual data values are not evaluated until a CNTK components requests the value. When a value is requested, by a component, it will search that components section of the configuration file, if the value is not found, it will continue looking in the parent parameter set and continue looking in parent parameter sets until the parameter is found, or the top level of the configuration hierarchy is reached without a match.
In CNTK configuration files Parameter Sets are organized in a hierarchical fashion. The actual data values are not evaluated until a CNTK components requests the value. When a value is requested, by a component, it will search that components section of the configuration file, if the value is not found, it will continue looking in the parent parameter set and continue looking in parent parameter sets until the parameter is found, or the top level of the configuration hierarchy is reached without a match.
### Default Values
@ -107,7 +112,69 @@ This section will go through a sample configuration file that creates a simple D
Here is a simple example of a configuration file:
```
# sample configuration file for CNTK command=mnistTrain:mnistTest #global parameters, all commands use these values unless overridden at a higher level precision=float deviceId=auto #commands used will be appended the stderr name to create a path stderr=c:\cntk\log\cntk # “_mnistTrain_mnistTest.log” would be appended traceLevel=0 # larger values mean more output ndlMacros=C:\cntk\config\DefaultMacros.ndl modelPath=c:\cntk\model\sample.dnn labelMappingFile=c:\cntk\data\mnist\labels.map mnistTrain=[ action=train minibatchSize=32 epochSize=60000 NDLNetworkBuilder=[ networkDescription=c:\cntk\config\sample.ndl run=ndlMacroUse ] SGD=[ #modelPath - moved to root level to share with mnistTest learningRatesPerMB=0.001 maxEpochs=50 ] reader=[ readerType=UCIFastReader file=c:\cntk\data\mnist\mnist_train.txt features=[ dim=784 start=1 ] labels=[ dim=1 start=0 labelDim=10 ] ] ] mnistTest=[ action=eval maxEpochs=1 epochSize=10000 minibatchSize=1000 reader=[ readerType=UCIFastReader randomize=None file=c:\data\mnist\mnist_test.txt features=[ dim=784 start=1 ] labels=[ dim=1 start=0 labelDim=10 ] ] ]
# sample configuration file for CNTK
command=mnistTrain:mnistTest
#global parameters, all commands use these values unless overridden at a higher level
precision=float
deviceId=auto
#commands used will be appended the stderr name to create a path
stderr=c:\cntk\log\cntk # “_mnistTrain_mnistTest.log” would be appended
traceLevel=0 # larger values mean more output
ndlMacros=C:\cntk\config\DefaultMacros.ndl
modelPath=c:\cntk\model\sample.dnn
labelMappingFile=c:\cntk\data\mnist\labels.map
mnistTrain=[
action=train
minibatchSize=32
epochSize=60000
NDLNetworkBuilder=[
networkDescription=c:\cntk\config\sample.ndl
run=ndlMacroUse
]
SGD=[
#modelPath - moved to root level to share with mnistTest
learningRatesPerMB=0.001
maxEpochs=50
]
reader=[
readerType=UCIFastReader
file=c:\cntk\data\mnist\mnist_train.txt
features=[
dim=784
start=1
]
labels=[
dim=1
start=0
labelDim=10
]
]
]
mnistTest=[
action=eval
maxEpochs=1
epochSize=10000
minibatchSize=1000
reader=[
readerType=UCIFastReader
randomize=None
file=c:\data\mnist\mnist_test.txt
features=[
dim=784
start=1
]
labels=[
dim=1
start=0
labelDim=10
]
]
]
```
### Commands and actions
@ -121,7 +188,9 @@ command=mnistTrain:mnistTest
This command instructs CNTK to execute the **mnistTrain** section of the config file, followed by mnistTest. Each of these Config sections has an action associated with it:
```
mnistTrain=[ action=train …
mnistTrain=[
action=train
```
The **mnistTrain** section will execute the **train** action, and the **mnistTest** section will execute **eval**. The names of the sections is arbitrary, but the configuration parameter names must be command and action.
@ -158,7 +227,9 @@ all | Use all the available GPU devices (will use PTask engine if more than one
Log files are redirection of the normal standard error output. All log information is sent to standard error, and will appear on the console screen unless the stderr parameter is defined, or some other form of user redirection is active. The stderr parameter defines the directory and the prefix for the log file. The suffix is defined by what commands are being run. As an example if “abc” is the setting “abc\_mnistTrain.log” would be the log file name. It is important to note that this file is overwritten on subsequent executions if the stderr parameter and the command being run are identical.
```
#commands used will be appended the stderr name to create a path stderr=c:\cntk\log\cntk # “_mnistTrain_mnistTest.log” would be appended traceLevel=0 # larger values mean more output
#commands used will be appended the stderr name to create a path
stderr=c:\cntk\log\cntk # “_mnistTrain_mnistTest.log” would be appended
traceLevel=0 # larger values mean more output
```
The **traceLevel** parameter is uniformly used by the code in CNTK to specify how much extra output (verbosity) is desired. The default value is 0 (zero) and specifies minimal output, the higher the number the more output can be expected. Currently 0-limited output, 1-medium ouput, 2-verbose output are the only values supported.
@ -168,7 +239,9 @@ The **traceLevel** parameter is uniformly used by the code in CNTK to specify ho
It is often advantageous to set some values at the top level of the config file. This is because config searches start with the target section and continue the search to higher level sections. If the same parameter is used in multiple sections putting the parameter at a higher level where both sections can share it can be a good idea. In our example the following parameters are used by both the train and the test step:
```
ndlMacros=C:\cntk\config\DefaultMacros.ndl modelPath=c:\cntk\model\sample.dnn labelMappingFile=c:\cntk\data\mnist\labels.map
ndlMacros=C:\cntk\config\DefaultMacros.ndl
modelPath=c:\cntk\model\sample.dnn
labelMappingFile=c:\cntk\data\mnist\labels.map
```
It can also be advantageous to specify parameters that often change all in one area, rather than separated into the sections to which the parameters belong. These commonly modified parameters can even be placed in a separate file if desired. See the layered config files in the reference section for more information.
@ -193,7 +266,34 @@ sub-section | Options | Description
For the Network Builder and the Trainer the existence of the sub-section name tells the train action which component to use. For example, **NDLNetworkBuilder** is specified in our example, so CNTK will use the NDL Network Builder to define the network. Similarly **SGD** is specified, so that trainer will be used. The reader sub-section is a little different, and is always called **reader**, the **readerType** parameter in the sub-section defines which reader will actually be used. Readers are implemented as separate DLLs, and the name of the reader is also the name of the DLL file that will be loaded.
```
mnistTrain=[ action=train minibatchSize=32 epochSize=60000 NDLNetworkBuilder=[ networkDescription=c:\cntk\config\sample.ndl run=ndlMacroUse ] SGD=[ #modelPath - moved to root level to share with mnistTest learningRatesPerMB=0.001 maxEpochs=50 ] reader=[ readerType=UCIFastReader file=c:\cntk\data\mnist\mnist_train.txt features=[ dim=784 start=1 ] labels=[ dim=1 start=0 labelDim=10 ] ] ]
mnistTrain=[
action=train
minibatchSize=32
epochSize=60000
NDLNetworkBuilder=[
networkDescription=c:\cntk\config\sample.ndl
run=ndlMacroUse
]
SGD=[
#modelPath - moved to root level to share with mnistTest
learningRatesPerMB=0.001
maxEpochs=50
]
reader=[
readerType=UCIFastReader
file=c:\cntk\data\mnist\mnist_train.txt
features=[
dim=784
start=1
]
labels=[
dim=1
start=0
labelDim=10
]
]
]
```
The rest of the parameters in the mnistTrain Command Section are briefly explained here, more details about the parameters available for each component are available in the Configuration Reference section of this document.
@ -212,7 +312,11 @@ epochSize=60000
**epochSize** is the number of dataset records that will be processed in a training pass. It is most often set to be the same as the dataset size, but can be smaller or larger that the dataset. It defaults to the size of the dataset if not present in the configuration file. It can also be set to zero for SGD, which has the same meaning.
```
SGD=[ #modelPath - moved to root level to share with mnistTest learningRatesPerMB=0.001 maxEpochs=50 ]
SGD=[
#modelPath - moved to root level to share with mnistTest
learningRatesPerMB=0.001
maxEpochs=50
]
```
**modelPath** is the path to the model file, and will be the name used when a model is completely trained. For epochs prior to the final model a number will be appended to the end signifying the epoch that was saved (i.e. myModel.dnn.5). These intermediate files are important to allow the training process to restart after an interruption. Training will automatically resume at the first non-existent epoch when training is restarted.
@ -246,7 +350,19 @@ readerType=UCIFastReader
Each of the readers uses the same interface into CNTK, and each reader is implemented in a separate DLL. There are many parameters in the reader section that are used by all the different types of readers, and some are specific to a particular reader. Our example reader section is as follows:
```
reader=[ readerType=UCIFastReader file=c:\cntk\data\mnist\mnist_train.txt features=[ dim=784 start=1 ] labels=[ dim=1 start=0 labelDim=10 ] ]
reader=[
readerType=UCIFastReader
file=c:\cntk\data\mnist\mnist_train.txt
features=[
dim=784
start=1
]
labels=[
dim=1
start=0
labelDim=10
]
]
```
The two sub-sections in the reader section identify two different data sets. In our example they are named **features** and **labels**, though any names could be used. These names need to match the names used in the NDL network definition Inputs in our example, so the correct definition is used for each input dataset. Each of these sections for the UCIFastReader have the following parameters:
@ -327,7 +443,17 @@ In addition being able to specify multiple configuration files at the command li
While layered configuration files allow users to reuse configuration files across experiments, this can still be a cumbersome process. For each experiment, a user might have to override several parameters, some of which might be long file paths (eg, stderr, modelPath, file, etc). The “stringize” functionality can make this process much easier. It allows a user to specify configuration like the following:
```
command=SpeechTrain stderr=$Root$\$RunName$.log speechTrain=[ modelPath=$Root$\$RunName$.model SGD=[ reader=[ features=[ type=Real dim=$DataSet1_Dim$ file=$DataSet1_Features$ ]]]]
command=SpeechTrain
stderr=$Root$\$RunName$.log
speechTrain=[
modelPath=$Root$\$RunName$.model
SGD=[
reader=[
features=[
type=Real
dim=$DataSet1_Dim$
file=$DataSet1_Features$
]]]]
```
Here, “Root”,“RunName”, “DataSet1\_Dim”, and “DataSet1\_Features” are variables specified elsewhere in the configuration (at a scope visible from the point at which they are used). When interpreting this configuration file, the parser would replace every string of the form “$VarName$” with the string “VarValue”, where “VarValue” represents the value of the variable called “VarName”. The variable resolution process is recursive; for example, if A=$B$, B=$C$, and C=HelloWorld.txt, then A would be resolved as “HelloWorld.txt”.
@ -350,7 +476,16 @@ If a parameter occurs more than once in a given parameter set, the last occurren
There must be a top-level command parameter, which defines the commands that will be executed in the configuration file. Each command references a Command section of the file, which must contain an action parameter defining the operation that section will perform:
```
command=mnistTrain:mnistTest mnistTrain=[ action=train … ] mnistTest=[ action=eval … ]
command=mnistTrain:mnistTest
mnistTrain=[
action=train
]
mnistTest=[
action=eval
]
```
This snippet will execute the **mnistTrain** section which executes the **train** action, followed by the **mnistTest** section.
@ -525,7 +660,18 @@ Each of the readers uses the same interface into CNTK, and each reader is implem
There are many parameters in the reader section that are used by all the different types of readers, and others are specific to a particular reader. There are sub-sections under the reader section which are used to define the data records to be read. For UCIFastReader these look like:
```
reader=[ readerType=UCIFastReader file=c:\cntk\data\mnist\mnist_train.txt features=[ dim=784 start=1 ] labels=[ dim=1 start=0 labelDim=10 ]
reader=[
readerType=UCIFastReader
file=c:\cntk\data\mnist\mnist_train.txt
features=[
dim=784
start=1
]
labels=[
dim=1
start=0
labelDim=10
]
]
```
@ -654,7 +800,8 @@ For dataset processing the following parameters are used:
SequenceReader is a reader that reads text string. It is mostly often used for language modeling tasks. An example of the text string is as follows:
```
</s> pierre <unk> N years old will join the board as a nonexecutive director nov. N </s> </s> mr. <unk> is chairman of <unk> n.v. the dutch publishing group </s>
</s> pierre <unk> N years old will join the board as a nonexecutive director nov. N </s>
</s> mr. <unk> is chairman of <unk> n.v. the dutch publishing group </s>
```
Symbol &lt;/s&gt; is used to denote both beginning and ending of a sentence. However, this symbol can be specified by beginSequence and endSequence.
@ -686,7 +833,19 @@ A subsection is for input label information.
LUSequenceReader is similar to SequenceReader. It however is used for language understanding tasks which have input and output strings that are different. The content of an example file is listed below
```
BOS O i O want O to O fly O from O boston B-fromloc.city_name at O 1110 B-arrive_time.time in O the O morning B-arrive_time.period_of_day EOS O
BOS O
i O
want O
to O
fly O
from O
boston B-fromloc.city_name
at O
1110 B-arrive_time.time
in O
the O
morning B-arrive_time.period_of_day
EOS O
```
consists of some unique setups as follows:
@ -704,7 +863,8 @@ The LUSequenceReader has some unique setups as follows:
- Wordmap – this specifies a file that maps inputs to other inputs. This is useful if the user wants to map some inputs to unknown symbols. For example:
```
buy buy trans <unk>
buy buy
trans <unk>
```
- File – the corpus file
@ -752,7 +912,67 @@ BinaryWriter is an implementation of a hierarchal file format the mirrors the co
The following is an example of a BinaryWriter definition. Since it is most commonly used as a cache for UCIFastReader, this definition is show as a UCIFastReader cache. The parameters needed for BinaryWriter are in bold type below:
```
# Parameter values for the reader with cache reader=[ # reader to use readerType=UCIFastReader # if writerType is set, we will cache to a binary file # if the binary file exists, we will use it instead of parsing this file writerType=BinaryReader miniBatchMode=Partial randomize=Auto windowSize=10000 #### write definition wfile=c:\data\mnist\mnist_train.bin #wsize - inital size of the file in MB # if calculated size would be bigger, that is used instead wsize=256 #wrecords - number of records we should allocate space for in the file # files cannot be expanded, so this should be large enough. wrecords=60000 features=[ dim=784 start=1 file=c:\data\mnist\mnist_train.txt ### write definition #wsize=200 #wfile=c:\data\mnist\mnist_train_features.bin sectionType=data ] labels=[ dim=1 start=0 file=c:\data\mnist\mnist_train.txt labelMappingFile=c:\temp\labels.txt labelDim=10 labelType=Category #### Write definition #### # sizeof(unsigned) which is the label index type #wsize=10 #wfile=c:\data\mnist\mnist_train_labels.bin elementSize=4 wref=features sectionType=labels mapping=[ #redefine number of records for this section, #since we don't need to save it for each data record wrecords=10 #variable size so use an average string size elementSize=10 sectionType=labelMapping ] category=[ dim=10 #elementSize=sizeof(ElemType) is default sectionType=categoryLabels ] ] ]
# Parameter values for the reader with cache
reader=[
# reader to use
readerType=UCIFastReader
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
writerType=BinaryReader
miniBatchMode=Partial
randomize=Auto
windowSize=10000
#### write definition
wfile=c:\data\mnist\mnist_train.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
#wrecords - number of records we should allocate space for in the file
# files cannot be expanded, so this should be large enough.
wrecords=60000
features=[
dim=784
start=1
file=c:\data\mnist\mnist_train.txt
### write definition
#wsize=200
#wfile=c:\data\mnist\mnist_train_features.bin
sectionType=data
]
labels=[
dim=1
start=0
file=c:\data\mnist\mnist_train.txt
labelMappingFile=c:\temp\labels.txt
labelDim=10
labelType=Category
#### Write definition ####
# sizeof(unsigned) which is the label index type
#wsize=10
#wfile=c:\data\mnist\mnist_train_labels.bin
elementSize=4
wref=features
sectionType=labels
mapping=[
#redefine number of records for this section,
#since we don't need to save it for each data record
wrecords=10
#variable size so use an average string size
elementSize=10
sectionType=labelMapping
]
category=[
dim=10
#elementSize=sizeof(ElemType) is default
sectionType=categoryLabels
]
]
]
]
```
@ -865,13 +1085,18 @@ array | ConfigArray |
<li>
<pre><code>{ value value value*# }</code></pre>
<pre><code>{
value
value
value*#
}</code></pre>
</li>
</ul>
</td>
<td>Multiple values in an array are separated by colons :. A value may be repeated multiple times with the * character followed by an integer (the # in the examples). Values in an array may be of any supported type and need not be uniform. The values in a vector can also be surrounded by curly braces {}, braces are required if new lines are used as separators. An alternate separation character can be specified immediately following the opening brace if desired. </td>
<td>Multiple values in an array are separated by colons :. A value may be repeated multiple times with the * character followed by an integer (the # in the examples). Values in an array may be of any supported type and need not be uniform. The values in a vector can also be surrounded by curly braces {}, braces are required if new lines are used as separators. An alternate separation character can be specified immediately following the opening brace if desired.
</td>
</tr>
<!-- DICTIONARY ROW -->
@ -894,12 +1119,18 @@ boolparam</code></pre>
</li>
<li>
<pre><code>[ parameter1=value1 parameter2=value2 boolparam ] </code></pre>
<pre><code>[
parameter1=value1
parameter2=value2
boolparam
]
</code></pre>
</li>
</ul>
</td>
<td>Multiple parameters grouped together in a dictionary. The contents of the dictionary are each named values and can be of different types. Dictionaries can be used to create a configuration hierarchy. When specified on the same line a ; semicolon is used as the default separator. The values can optionally be surrounded by square braces []. Braces are required when using newlines as separators in a config file. An unnamed dictionary is also allowed in the case of an array of dictionaries. An alternate separation character can be specified immediately following the opening brace if desired. </td>
<td>Multiple parameters grouped together in a dictionary. The contents of the dictionary are each named values and can be of different types. Dictionaries can be used to create a configuration hierarchy. When specified on the same line a ; semicolon is used as the default separator. The values can optionally be surrounded by square braces []. Braces are required when using newlines as separators in a config file. An unnamed dictionary is also allowed in the case of an array of dictionaries. An alternate separation character can be specified immediately following the opening brace if desired.
</td>
</tr>
</table>
@ -910,7 +1141,9 @@ boolparam</code></pre>
There are three main classes that are used to access configuration files. *ConfigParameters* and *ConfigArray* contain instances of *ConfigValue*. The main definitions are as follows:
```
class ConfigValue : public std::string class ConfigParameters : public ConfigParser, public ConfigDictionary class ConfigArray:public ConfigParser, public std::vector<ConfigValue>
class ConfigValue : public std::string
class ConfigParameters : public ConfigParser, public ConfigDictionary
class ConfigArray:public ConfigParser, public std::vector<ConfigValue>
```
##### ConfigValue
@ -968,7 +1201,8 @@ To use this method with a ConfigArray, the file can simply contain a list of val
ConfigArray instances can also be converted to argvector&lt;T&gt; instances simply by assigning them. Care should be taken to assign to a local variable, and not just passing as a parameter due to lifetime issues, as follows:
```
ConfigArray configLearnRatesPerMB = config("learningRatesPerMB"); argvector<float> learnRatesPerMB = configLearnRatesPerMB;
ConfigArray configLearnRatesPerMB = config("learningRatesPerMB");
argvector<float> learnRatesPerMB = configLearnRatesPerMB;
```
ConfigParameters and ConfigArray instances are very flexible, but require parsing every time a value is accessed. argvector&lt;T&gt; ,on the other hand, parses once and then accesses values as a standard vector.
@ -978,7 +1212,60 @@ ConfigParameters and ConfigArray instances are very flexible, but require parsin
Some sample code that would parse the example configuration file given at the beginning of this document follows. This is a revised version of actual code in CNTK:
```
#include "commandArgUtil.h" // process the command void DoCommand(const ConfigParameters& config) { ConfigArray command = config("command"); for (int i=0; i < command.size(); i++) { //get the configuration parameters that match the command ConfigParameters commandParams=config(command[i]); ConfigArray action = commandParams("action","train"); // determine the action to perform, and do it for (int j=0; j < action.size(); j++) { if (action[j] == "train") DoTrain(commandParams); else if (action[j] == "test" || action[j] == "eval") DoEval(commandParams); else throw runtime_error("unknown action: " + action[j] + " in command set: " + command[i]); } } } void DoTrain(const ConfigParameters& config) { ConfigParameters configSGD=config("SGD"); ConfigParameters readerConfig = config("reader"); IComputationNetBuilder* netBuilder = NULL; ConfigParameters configNDL = config("NDLNetworkBuilder"); netBuilder = (IComputationNetBuilder*)new NDLBuilder(configNDL); DataReader* dataReader = new DataReader(readerConfig); ConfigArray learningRatesPerMBStr = configSGD("learningRatesPerMB", ""); floatargvector learningRatesPerMB = learningRatesPerMBStr; ConfigArray minibatchSize = configSGD("minibatchSize", "256"); size_t epochSize = configSGD("epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } size_t maxEpochs = configSGD("maxEpochs"); wstring modelPath = configSGD("modelPath"); int traceLevel = configSGD("traceLevel", "0"); SGD = sgd(learningRatesPerMB, minibatchSize, epochSize, maxEpochs, modelPath, traceLevel); sgd.Train(netBuilder, dataReader); delete netBuilder; delete dataReader; }
#include "commandArgUtil.h"
// process the command
void DoCommand(const ConfigParameters& config)
{
ConfigArray command = config("command");
for (int i=0; i < command.size(); i++)
{
//get the configuration parameters that match the command
ConfigParameters commandParams=config(command[i]);
ConfigArray action = commandParams("action","train");
// determine the action to perform, and do it
for (int j=0; j < action.size(); j++)
{
if (action[j] == "train")
DoTrain(commandParams);
else if (action[j] == "test" || action[j] == "eval")
DoEval(commandParams);
else
throw runtime_error("unknown action: " + action[j] + " in command set: " + command[i]);
}
}
}
void DoTrain(const ConfigParameters& config)
{
ConfigParameters configSGD=config("SGD");
ConfigParameters readerConfig = config("reader");
IComputationNetBuilder* netBuilder = NULL;
ConfigParameters configNDL = config("NDLNetworkBuilder");
netBuilder = (IComputationNetBuilder*)new NDLBuilder(configNDL);
DataReader* dataReader = new DataReader(readerConfig);
ConfigArray learningRatesPerMBStr = configSGD("learningRatesPerMB", "");
floatargvector learningRatesPerMB = learningRatesPerMBStr;
ConfigArray minibatchSize = configSGD("minibatchSize", "256");
size_t epochSize = configSGD("epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
size_t maxEpochs = configSGD("maxEpochs");
wstring modelPath = configSGD("modelPath");
int traceLevel = configSGD("traceLevel", "0");
SGD = sgd(learningRatesPerMB, minibatchSize, epochSize, maxEpochs, modelPath, traceLevel);
sgd.Train(netBuilder, dataReader);
delete netBuilder;
delete dataReader;
}
```
The code above is very easy to code, you simply delare a config, or basic type variable on the stack and assign something from a ConfigParameters class to that variable (i.e. int i = config(”setting”,”default”). Both parameters with defaults and those that dont are used in the sample code above. The ConfigValue class takes care of parsing the value to be the correct type, and is returned by config() references above.
@ -994,7 +1281,10 @@ Other possible scenarios are also enabled by using a common interface, for examp
The five readers and one writer provided with CNTK all use these same interfaces and each is housed in its own DLL. CNTK loads the DLL and looks for exported functions that will return the interface of interest. The functions are defined as follows:
```
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader); extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader); extern "C" DATAWRITER_API void GetWriterF(IDataWriter<float>** pwriter); extern "C" DATAWRITER_API void GetWriterD(IDataWriter<double>** pwriter);
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader);
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader);
extern "C" DATAWRITER_API void GetWriterF(IDataWriter<float>** pwriter);
extern "C" DATAWRITER_API void GetWriterD(IDataWriter<double>** pwriter);
```
each reader or writer DLL exports the appropriate functions, and will return the interface when called. The following sections defined the interfaces:
@ -1002,7 +1292,31 @@ each reader or writer DLL exports the appropriate functions, and will return the
#### Reader Interface
```
/ Data Reader interface // implemented by DataReader and underlying classes template<class ElemType> class DATAREADER_API IDataReader { public: typedef std::string LabelType; typedef unsigned LabelIdType; virtual void Init(const ConfigParameters& config) = 0; virtual void Destroy() = 0; virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize) = 0; virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) = 0; virtual const std::map<typename LabelIdType, typename LabelType>& GetLabelMapping(const std::wstring& sectionName) = 0; virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<typename LabelIdType, typename LabelType>& labelMapping) = 0; virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart) = 0; virtual bool DataEnd(EndDataType endDataType) = 0; // Recursive network specific methods virtual size_t NumberSlicesInEachRecurrentIter() = 0; virtual void SetNbrSlicesEachRecurrentIter(const size_t) = 0; virtual void ReloadLabels() = 0; virtual void SaveLabels() = 0; virtual void SetSentenceEndInBatch(vector<size_t> &sentenceEnd)=0; };
/ Data Reader interface
// implemented by DataReader and underlying classes
template<class ElemType>
class DATAREADER_API IDataReader
{
public:
typedef std::string LabelType;
typedef unsigned LabelIdType;
virtual void Init(const ConfigParameters& config) = 0;
virtual void Destroy() = 0;
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize) = 0;
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) = 0;
virtual const std::map<typename LabelIdType, typename LabelType>& GetLabelMapping(const std::wstring& sectionName) = 0;
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<typename LabelIdType, typename LabelType>& labelMapping) = 0;
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart) = 0;
virtual bool DataEnd(EndDataType endDataType) = 0;
// Recursive network specific methods
virtual size_t NumberSlicesInEachRecurrentIter() = 0;
virtual void SetNbrSlicesEachRecurrentIter(const size_t) = 0;
virtual void ReloadLabels() = 0;
virtual void SaveLabels() = 0;
virtual void SetSentenceEndInBatch(vector<size_t> &sentenceEnd)=0;
};
```
The methods are as follows:
@ -1068,7 +1382,21 @@ The methods are as follows:
#### Writer Interface
```
// Data Writer interface // implemented by some DataWriters template<class ElemType> class DATAWRITER_API IDataWriter { public: typedef std::string LabelType; typedef unsigned LabelIdType; virtual void Init(const ConfigParameters& config) = 0; virtual void Destroy() = 0; virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections) = 0; virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized) = 0; virtual void SaveMapping(std::wstring saveId, const std::map<typename LabelIdType, typename LabelType>& labelMapping) = 0; };
// Data Writer interface
// implemented by some DataWriters
template<class ElemType>
class DATAWRITER_API IDataWriter
{
public:
typedef std::string LabelType;
typedef unsigned LabelIdType;
virtual void Init(const ConfigParameters& config) = 0;
virtual void Destroy() = 0;
virtual void GetSections(std::map<std::wstring, SectionType, nocase_compare>& sections) = 0;
virtual bool SaveData(size_t recordStart, const std::map<std::wstring, void*, nocase_compare>& matrices, size_t numRecords, size_t datasetSize, size_t byteVariableSized) = 0;
virtual void SaveMapping(std::wstring saveId, const std::map<typename LabelIdType, typename LabelType>& labelMapping) = 0;
};
```
The methods are as follows:
@ -1111,22 +1439,63 @@ The library uses BLAS libraries from NVidia for the GPU (CuBLAS) and AMD for the
### PTask support
PTask is a library used in CTNK to enable multiple GPU computation on a single machine. PTask uses the concept of a “Tasks organized in a filter graph. It allows fully asynchronous operation of the tasks, each only depending on inputs being available to execute. PTask distributes the tasks across the available hardware and handles data transfers.
PTask is a library used in CNTK to enable multiple GPU computation on a single machine. PTask uses the concept of a “Tasks organized in a filter graph. It allows fully asynchronous operation of the tasks, each only depending on inputs being available to execute. PTask distributes the tasks across the available hardware and handles data transfers.
CTNK is organized in a different fashion with Computation Nodes. However, each node has two methods that do all the computation work: EvaluateThisNode() and ComputeInputPartial(), which can be used as the “Tasks”. However, since Tasks can be executed asynchronously, they need to be stateless. To enable these methods as task a static version of each method that takes all inputs and outputs as parameters are created. The class methods simply call these “Task” functions with the class variables for their implementation.
CNTK is organized in a different fashion with Computation Nodes. However, each node has two methods that do all the computation work: EvaluateThisNode() and ComputeInputPartial(), which can be used as the “Tasks”. However, since Tasks can be executed asynchronously, they need to be stateless. To enable these methods as task a static version of each method that takes all inputs and outputs as parameters are created. The class methods simply call these “Task” functions with the class variables for their implementation.
The PTaskGraphBuilder component takes a computation network and transforms it into a filter graph. In order to do this work it requires the parameter description for each of the tasks. Since C++ does not have a reflection mechanism as in available in C\# and some other languages, a class method has been introduced to ComputationNode to provide this information. The method GetPTaskDescriptor() provides this information to PTaskGraphBuilder so it can build the graph.
The following is an example of a GetPTaskDescriptor() implementation. This function returns a TaskDescriptor class containing all the parameter and other information necessary to build the filter graph for a particular node. This node is the “TimesNode” and does a matrix multiply. The following implementation of the two important member functions are:
```
virtual void EvaluateThisNode() { EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues()); } virtual void ComputeInputPartial(const size_t inputIndex) { if (inputIndex > 1) throw std::invalid_argument("Times operation only takes two inputs."); if (inputIndex == 0) //left derivative { ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues()); } else //right derivative { ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues()); } }
virtual void EvaluateThisNode()
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
}
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex > 1)
throw std::invalid_argument("Times operation only takes two inputs.");
if (inputIndex == 0) //left derivative
{
ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues());
}
else //right derivative
{
ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues());
}
}
```
The GPTaskDescriptor() method describes the necessary parameter information for each method. Each node has a FunctionValue matrix and a GradientValue matrix associated with it, and the descriptor methods identify which values are needed, and if they come from the current node or one of its inputs as follows:
```
// GetTaskDescriptor - Get a task descriptor for this node // taskType - task type we are generating a task for virtual TaskDescriptor<ElemType>* GetPTaskDescriptor(TaskType taskType, size_t inputIndex=0) const { TaskDescriptor<ElemType>* descriptor = new TaskDescriptor<ElemType>(this, taskType, inputIndex); switch(taskType) { case taskComputeInputPartial: descriptor->FunctionParam(1-inputIndex, paramOptionsInput); descriptor->GradientParam(inputIndex, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize); descriptor->GradientParam(); descriptor->SetFunction( (inputIndex?(FARPROC)ComputeInputPartialRight:(FARPROC)ComputeInputPartialLeft)); break; case taskEvaluate: descriptor->FunctionParam(); descriptor->FunctionParam(0, paramOptionsInput); descriptor->FunctionParam(1, paramOptionsInput); descriptor->SetFunction((FARPROC)EvaluateThisNodeS); break; default: assert(false); throw std::logic_error("Unsupported task requested"); } return descriptor; }
// GetTaskDescriptor - Get a task descriptor for this node
// taskType - task type we are generating a task for
virtual TaskDescriptor<ElemType>* GetPTaskDescriptor(TaskType taskType, size_t inputIndex=0) const
{
TaskDescriptor<ElemType>* descriptor = new TaskDescriptor<ElemType>(this, taskType, inputIndex);
switch(taskType)
{
case taskComputeInputPartial:
descriptor->FunctionParam(1-inputIndex, paramOptionsInput);
descriptor->GradientParam(inputIndex, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize);
descriptor->GradientParam();
descriptor->SetFunction( (inputIndex?(FARPROC)ComputeInputPartialRight:(FARPROC)ComputeInputPartialLeft));
break;
case taskEvaluate:
descriptor->FunctionParam();
descriptor->FunctionParam(0, paramOptionsInput);
descriptor->FunctionParam(1, paramOptionsInput);
descriptor->SetFunction((FARPROC)EvaluateThisNodeS);
break;
default:
assert(false);
throw std::logic_error("Unsupported task requested");
}
return descriptor;
}
```
For the Evaluate method, the first parameter is an output to the FunctionValue matrix of the current node.
@ -1138,7 +1507,8 @@ descriptor->FunctionParam();
The default value for this method is “current node, output” so no parameters are needed. The next two parameters are inputs and are the function values from the two inputs:
```
descriptor->FunctionParam(0, paramOptionsInput); descriptor->FunctionParam(1, paramOptionsInput);
descriptor->FunctionParam(0, paramOptionsInput);
descriptor->FunctionParam(1, paramOptionsInput);
```
The last call passes a pointer to the task function:
@ -1150,7 +1520,8 @@ descriptor->SetFunction((FARPROC)EvaluateThisNodeS);
and the descriptor is complete. The two ComputeInputPartial task function parameters are very similar. Depending on the inputIndex, the values are switched. The first parameter is an input of the function value of one of the inputs, and the second is an output value to the gradient matrix of the other input:
```
descriptor->FunctionParam(1-inputIndex, paramOptionsInput); descriptor->GradientParam(inputIndex, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize);
descriptor->FunctionParam(1-inputIndex, paramOptionsInput);
descriptor->GradientParam(inputIndex, paramOptionsInput | paramOptionsOutput | paramOptionsInitialize);
```
The second parameter is interesting because it is required to retain it value from one call to the next, this is done in a filter graph by having a parameter be input and output at the same time, meaning it updates itself. There is a clear distinction between values that need to be maintained and those that are transcient in a filter graph, and this idiom is how we instruct PTaskGraphBuilder to retain the value. The Initialize option is also necessary so on the first iteration the matrix will be cleared out (zeros).
@ -1170,7 +1541,12 @@ descriptor->SetFunction((inputIndex ? (FARPROC)ComputeInputPartialRight : (FARPR
For reference the three task functions are as follows:
```
static void WINAPI ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) static void WINAPI ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1) ```
static void WINAPI ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static void WINAPI ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
```
### NDL classes and processing

Просмотреть файл

@ -5,7 +5,6 @@ command = Add_Operator_Constant
precision = "float"
traceLevel = 1
outputNodeNames = AddResult
#######################################
# NETWORK CONFIG #

Просмотреть файл

@ -5,7 +5,6 @@ command = Add_Operator_Constant
precision = "float"
traceLevel = 1
outputNodeNames = AddResult
#######################################
# NETWORK CONFIG #

Просмотреть файл

@ -4,4 +4,4 @@ ConfigDir=$WorkDir$\config
NdlDir=$ConfigDir$
ExpDir=c:\temp\exp\atis
OutDir=$ExpDir$\output
DeviceNumber=0
DeviceId=0

Просмотреть файл

@ -1,21 +1,16 @@
# configuration file for CNTK ATIS for language understanding tasks
stderr=$ExpDir$\ATIS\logd
precision="float"
deviceId = $DeviceId$
command=LSTM:LSTMTest
type=float
deviceId=0 #"auto" # use -1 for CPU. Note: due to a bug, testing only works on CPU
traceLevel=1
LSTM=[
action=train
traceLevel=1
makeMode=true
# output model path
modelPath=$ExpDir$\cntkdebug.dnn
modelPath=$ExpDir$/cntkdebug.dnn
# uncomment NDLNetworkBuilder to use NDL
# need to comment out SimpleNetworkBuilder section
@ -100,7 +95,7 @@ LSTM=[
# writerType=BinaryReader
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
wfile=$ExpDir$/sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
@ -112,8 +107,8 @@ LSTM=[
windowSize=10000
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.train.apos.pred.pos.head.IOB.simple
wordmap=$DataDir$/inputmap.txt
file=$DataDir$/atis.train.apos.pred.pos.head.IOB.simple
#additional features sections
#for now store as expanded category data (including label in)
@ -137,14 +132,14 @@ LSTM=[
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.txt
labelMappingFile=$ExpDir$/sentenceLabels.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
# input word list
token=$DataDir$\input.txt
token=$DataDir$/input.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
@ -169,9 +164,9 @@ LSTM=[
labelType=Category
# output token list
token=$DataDir$\output.txt
token=$DataDir$/output.txt
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
labelMappingFile=$ExpDir$/sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
sectionType=labels
@ -200,7 +195,7 @@ LSTM=[
equalLength = false
#### write definition
wfile=$ExpDir$\sequenceSentence.valid.bin
wfile=$ExpDir$/sequenceSentence.valid.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
@ -212,8 +207,8 @@ LSTM=[
windowSize=10000
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.dev.IOB.simple
wordmap=$DataDir$/inputmap.txt
file=$DataDir$/atis.dev.IOB.simple
#additional features sections
#for now store as expanded category data (including label in)
@ -237,13 +232,13 @@ LSTM=[
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.in.txt
labelMappingFile=$ExpDir$/sentenceLabels.in.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
token=$DataDir$\input.txt
token=$DataDir$/input.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
@ -267,10 +262,10 @@ LSTM=[
dim=1
labelType=Category
token=$DataDir$\output.txt
token=$DataDir$/output.txt
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
labelMappingFile=$ExpDir$/sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
@ -299,13 +294,12 @@ LSTM=[
LSTMTest=[
action=write
traceLevel=1
epochSize=4430000
# which is 886 * 5000
#recurrentLayer=1
defaultHiddenActivity=0.1
modelPath=$ExpDir$\cntkdebug.dnn
modelPath=$ExpDir$/cntkdebug.dnn
outputNodeNames=outputs:labels
@ -315,15 +309,15 @@ LSTMTest=[
randomize=None
wordContext=0:1:2
unk="<unk>"
wordmap=$DataDir$\inputmap.txt
file=$DataDir$\atis.test.apos.pred.pos.head.IOB.simple
wordmap=$DataDir$/inputmap.txt
file=$DataDir$/atis.test.apos.pred.pos.head.IOB.simple
# if writerType is set, we will cache to a binary file
# if the binary file exists, we will use it instead of parsing this file
# writerType=BinaryReader
#### write definition
wfile=$ExpDir$\sequenceSentence.bin
wfile=$ExpDir$/sequenceSentence.bin
#wsize - inital size of the file in MB
# if calculated size would be bigger, that is used instead
wsize=256
@ -355,13 +349,13 @@ LSTMTest=[
# vocabulary size
labelDim=10000
labelMappingFile=$ExpDir$\sentenceLabels.txt
labelMappingFile=$ExpDir$/sentenceLabels.txt
labelType=Category
beginSequence="BOS"
endSequence="EOS"
usewordmap=true
token=$DataDir$\input.txt
token=$DataDir$/input.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
@ -387,12 +381,12 @@ LSTMTest=[
beginSequence="BOS"
endSequence="EOS"
token=$DataDir$\output.txt
token=$DataDir$/output.txt
# vocabulary size
labelDim=127
labelMappingFile=$ExpDir$\sentenceLabels.out.txt
labelMappingFile=$ExpDir$/sentenceLabels.out.txt
#### Write definition ####
# sizeof(unsigned) which is the label index type
elementSize=4
@ -416,13 +410,13 @@ LSTMTest=[
writerType=LUSequenceWriter
outputs=[
file=$OutDir$\output.rec.txt
token=$DataDir$\output.txt
file=$OutDir$/output.rec.txt
token=$DataDir$/output.txt
]
labels=[
file=$OutDir$\output.lbl.txt
token=$DataDir$\output.txt
file=$OutDir$/output.lbl.txt
token=$DataDir$/output.txt
]
]
]

Просмотреть файл

@ -1,11 +1,14 @@
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)Examples/Text/PennTreebank DeviceId=-1 makeMode=false
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)g2p makeMode=false
# TASK
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)Examples/Text/PennTreebank
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)g2p
####################
# WORK IN PROGRESS #
# WORK IN PROGRESS #
# WORK IN PROGRESS #
####################
makeMode = false
# Command line to run in debugger:
# configFile=$(SolutionDir)Examples/Text/PennTreebank/Config/S2SAutoEncoder.cntk RunRootDir=$(SolutionDir)Examples/Text/PennTreebank train=[SGD=[maxEpochs=1]] confVocabSize=1000 DeviceId=-1 makeMode=false
@ -19,18 +22,15 @@ ExpRootDir = "$RunRootDir$"
#ExpId = _run
deviceId = 1
#ExpId = 68-$deviceId$-s2sae-bigmodel
ExpId = 06-$deviceId$-g2p
#ExpId = 05-3-g2p # for decoding a different model
#ExpId = 41-$deviceId$-s2sae # TASK
ExpId = 14-$deviceId$-g2p
#ExpId = 13-5-g2p # for decoding a different model
# directories
ExpDir = "$ExpRootDir$/$ExpId$"
ModelDir = "$ExpDir$/Models"
stderr = $ExpDir$/S2SAutoEncoder.log7
# Append this for small set:
# train=[epochSize=2048]] trainFile=ptb.small.train.txt validFile=ptb.small.valid.txt testFile=ptb.small.test.txt
stderr = $ExpDir$/S2SAutoEncoder.log3
# It implements a sequence-to-sequence based auto-encoder.
# It encodes an entire sentence into a flat vector, and tries to regenerate it.
@ -38,18 +38,23 @@ stderr = $ExpDir$/S2SAutoEncoder.log7
command = writeWordAndClassInfo:train:test:write
#command = write
#command = dump
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/S2SAutoEncoder.dnn"
decodeModelPath = "$modelPath$.13" # epoch to decode can be appended here
beamDepth = 1 # 0=predict; 1=greedy; >1=beam
decodeModelPath = "$modelPath$.35" # epoch to decode can be appended here TASK
beamDepth = 3 # 0=predict; 1=greedy; >1=beam
decodeOutputPath = "$decodeModelPath$.b$beamDepth$"
dumpModelPath = "$modelPath$.2" # model to dump if needed
#confVocabSize = 10000
#confClassSize = 50
#maxLength = 84
#isAutoEncoder=true
#
#trainFile = "ptb.train.txt"
##trainFile = "ptb.small.train.txt"
#validFile = "ptb.valid.txt"
@ -59,13 +64,16 @@ decodeOutputPath = "$decodeModelPath$.b$beamDepth$"
##testFile = "ptb.small.train.txt" # test on train, to see whether model makes sense at all
#startSymbol = "</s>"
confVocabSize = 69 #10000
confClassSize = 0 #50
confVocabSize = 69
confClassSize = 0
maxLength = 20
isAutoEncoder=false
trainFile = "g014b2b.train-dev-20-21.bsf.joint"
#trainFile = "g014b2b.train-dev-1-21.bsf.joint" # small one for debugging
validFile = "g014b2b.train-dev-1-21.bsf.joint"
testFile = "g014b2b.test.bsf.joint"
#testFile = "g014b2b.test.bsf.joint.masked"
startSymbol = "<s>"
#######################################
@ -74,34 +82,36 @@ startSymbol = "<s>"
BrainScriptNetworkBuilder = (new ComputationNetwork [
# TODO: move this somewhere shared
enableTracing = true
traceFrequency = 1000
traceFrequency = 100
tracingLabelMappingFile = "$ModelDir$/vocab.wl"
beamDepth=3 // for above Trace macros only, need to clean that up
include "S2SLib.bs"
beamDepth=3 // for above Trace macros only
# import general config options from outside config values
vocabDim = $confVocabSize$
nbrClass = $confClassSize$
isAutoencoder = false # input is only one sequence, meant to reproduce itself
isAutoencoder = $isAutoEncoder$ # input is only one sequence, meant to reproduce itself
attentionSpan = $maxLength$ # 0 to disable. We only support fixed-size attention windows for now. 0 means no attention; exactly 20 is needed for the g2p CMUDict task
useStabilizer = true
useEncoder = true # if false, this becomes a regular RNN
useNYUStyle = false # if true use thought vector for all inputs, NYU-style
attentionSpan = 20 # we only support fixed-size attention windows for now. 0 means no attention; exactly 20 is needed for the g2p CMUDict task
useBidirectionalEncoder = false
# import some namespaces
# import some names
Parameters = BS.Parameters
Constants = BS.Constants
Sequences = BS.Sequences
Loop = BS.Loop
Boolean = BS.Boolean
RecurrentLSTMP = BS.RNNs.RecurrentLSTMP
# dimensions
embeddingDim = $confVocabSize$ # 300
hiddenDim = 750 # 512 # 1024 # 200 --TODO: Kaisheng used 500
maxLayer = 2 # 1 # 0
hiddenDim = 512 #420 #768 # 1024 # 200 --TODO: Kaisheng used 500
attentionDim = 128 # dim of attention projection
maxLayer = 1 # 0
encoderDims[i:0..maxLayer] = hiddenDim # this defines the number of hidden layers in each
decoderDims[i:0..maxLayer] = hiddenDim # both are one LSTM layer only for now
@ -110,34 +120,35 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
#input = SparseInput(vocabDim, tag='feature'); # BUGBUG: Slice() not working for sparse, need to extend TensorView
input = Input(vocabDim, tag='feature');
# get out input and label data
streams = [
rawInput = input
out = if isAutoencoder
then [
# for an auto-encoder, both are the same
input = rawInput
labels = rawInput
]
else [
# we encode input and label as a single input; this splits it into two
separatorRow = 2 # row index of separator symbokl
isSeparator = RowSlice (separatorRow, 1, rawInput) # cut out the separator as a flag
inInput = Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) # flag sequence: word is input...
inLabels = Boolean.Or (PastValue (1, inLabels, defaultHiddenActivation=0), isSeparator) # ...or labels
input = Sequences.Gather (inInput, rawInput) # use flags to split raw input into input and labels
labels = Sequences.Gather (inLabels, rawInput) # (both have different lengths)
]
].out
rawInput = input
out = if isAutoencoder
then [
# for an auto-encoder, both are the same
input = rawInput
labels = rawInput
]
else [
# we encode input and label as a single input; this splits it into two
# This dance will become unnecessary once the new Reader API is fully hooked in.
separatorRow = 2 # row index of separator symbokl
isSeparator = RowSlice (separatorRow, 1, rawInput) # cut out the separator as a flag
inInput = Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) # flag sequence: word is input...
inLabels = Boolean.Or (PastValue (1, inLabels, defaultHiddenActivation=0), isSeparator) # ...or labels
input = Sequences.Gather (inInput, rawInput) # use flags to split raw input into input and labels
labels = Sequences.Gather (inLabels, rawInput) # (both have different lengths)
]
].out
# helpers
# helpers --TODO: move to CNTK.core.bs
First (x) = Slice (0, 1, x, axis=-1)
Last (x) = Slice (-1, 0, x, axis=-1)
# strip separators
# TODO: find out which one is the correct one
#inputSequence = Slice (0, -1, streams.input, axis=-1) # e.g. <s> A B C # TODO: process </s> as well, to trigger the thought vector
inputSequence = streams.input # e.g. <s> A B C </s>
labelSequence = Slice (1, 0, streams.labels, axis=-1) # e.g. A B C </s>
inputSequence = Pass ( streams.input ) # e.g. <s> A B C </s>
labelSequence = Pass (Slice (1, 0, streams.labels, axis=-1)) # e.g. A B C </s>
# ^^ use Pass() to make these easily accessible in MEL, e.g. for decoding
# embeddings --as long as we cannot read multiple sequences, we got one embedding
# Note: Embeddings are linear, so better stabilize. We really should use BatchNorm.
@ -151,49 +162,61 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
labelSentenceStart = First (streams.labels)
labelSentenceStartEmbedded = EmbedLabels (labelSentenceStart)
RecurrentLSTMPWithAttentionWindow2 (inputDim/*x.dim*/, outputDim/*h.dim*/, cellDim/*c.dim*/, x, projectedAttentionWindowBroadcast, attentionDim, attentionSpan, enableSelfStabilization=false) =
RecurrentLSTMPWithAttentionWindow2 (inputDim1/*x.dim*/, outputDim/*h.dim*/, cellDim1/*c.dim*/, x, projectedAttentionWindowBroadcast, attentionDim, attentionSpan, previousHook=BS.RNNs.PreviousHC, enableSelfStabilization=false) =
[
prevState =
[
h = Loop.Previous (lstmState.h) # hidden state(t-1)
c = Loop.Previous (lstmState.c) # cell(t-1)
]
prevState = previousHook (lstmState)
# compute additional hidden state from attention
W(x) = Parameters.WeightParam (attentionDim, outputDim) * Parameters.Stabilize (x, enabled=useStabilizer)
projectedH = W (prevState.h) # [cellDim]
tanHOut = Tanh (projectedAttentionWindowBroadcast.value + projectedH) # [attentionDim x attentionSpan]
v(x) = Parameters.WeightParam (1, attentionDim) * Parameters.Stabilize (x, enabled=useStabilizer) # [1 x attentionDim]
W(x) = TraceDense( Parameters.WeightParam (attentionDim, outputDim) , 'Wdec') * Parameters.Stabilize (x, enabled=false/*useStabilizer*/)
projectedH = W (prevState.h) # [outputDim] // [outputDim x D]
tanHOut = Tanh (TraceDense( projectedAttentionWindowBroadcast.projectedValue, 'hencp') + TraceDense ( projectedH, 'hdecp')) # [attentionDim x attentionSpan]
# ^^ [attDim x 1 x attSpan] + [attDim x D] -> [attDim x D x attSpan]
v(x) = TraceDenseTransposed( Parameters.WeightParam (1, attentionDim) ,'v') * Parameters.Stabilize (x, enabled=useStabilizer) # [1 x attentionDim]
u = v (tanHOut) # [1 x attentionSpan]
# [1 x D x attSpan]
uValid = u + Log (projectedAttentionWindowBroadcast.valid) # [1 x attentionSpan]
attentionWeights = Softmax (uValid) # [1 x attentionSpan]
weightedAttentionWindow = projectedAttentionWindowBroadcast.value .* attentionWeights # [attentionDim x attentionSpan]
weightedAttentionAverage = weightedAttentionWindow * BS.Constants.OnesTensor (attentionSpan) # [attentionDim]
# [1 x D x attSpan] + [1 x 1 x attSpan] -> [1 x D x attSpan]
attentionWeights = TraceDense( Softmax (uValid) ,'weights') # [1 x attentionSpan]
# [1 x D x attSpan] BUGBUG, needs to keep Softmax denoms separate over D
weightedAttentionWindow = projectedAttentionWindowBroadcast.value .* attentionWeights # [encoderHiddenDim x attentionSpan]
# [encDim x 1 x attSpan] .* [1 x D x attSpan] -> [encDim x D x attSpan] BUGBUG, needs to keep Softmax denoms separate over D
weightedAttentionAverage = weightedAttentionWindow * BS.Constants.OnesTensor (attentionSpan) # [encoderHiddenDim]
# [encDim x D]
# feed both to LSTM as a single agumented input, so that we can reuse the existing LSTM component
augmentedX = RowStack (weightedAttentionAverage : x)
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (attentionDim + inputDim, outputDim, cellDim, augmentedX, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // that's the value we return
RecurrentLSTMP2WithInitialState (inputDim, outputDim, cellDim, x, initialState, enableSelfStabilization=false) =
[
prevState =
[
isFirst = Loop.IsFirst (initialState.h)
h = Boolean.If (isFirst, initialState.h, Loop.Previous (lstmState.h)) // hidden state(t-1)
c = Boolean.If (isFirst, initialState.c, Loop.Previous (lstmState.c)) // cell(t-1)
]
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
lstmState = BS.RNNs.LSTMP (outputDim, cellDim=cellDim1, augmentedX, inputDim=projectedAttentionWindowBroadcast.dim + inputDim1, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // that's the value we return
# encoder (processes inputEmbedded)
encoder = BS.RNNs.RecurrentLSTMP2Stack (inputEmbedded, embeddingDim, encoderDims, encoderDims, enableSelfStabilization=useStabilizer)
encoderOutputLayer = Length (encoderDims)-1
encoderOutput = encoder[encoderOutputLayer]
encoder =
if useBidirectionalEncoder
then
[
encoderOutputLayer = Length (encoderDims)-1
forwardEncoder = BS.RNNs.RecurrentLSTMP2Stack (encoderDims, /*cellDims=encoderDims,*/ inputEmbedded, inputDim=embeddingDim, enableSelfStabilization=useStabilizer)
NextHC (lstmState) = [
h = Loop.Next (lstmState.h) // hidden state(t-1)
c = Loop.Next (lstmState.c) // cell(t-1)
]
backwardEncoder = BS.RNNs.RecurrentLSTMP2Stack (encoderDims, /*encoderDims,*/ inputEmbedded, inputDim=embeddingDim, previousHook=NextHC, enableSelfStabilization=useStabilizer)
output = [
h = RowStack (forwardEncoder[encoderOutputLayer].h : backwardEncoder[encoderOutputLayer].h)
c = RowStack (forwardEncoder[encoderOutputLayer].c : backwardEncoder[encoderOutputLayer].c)
dim = 2 * encoderDims[encoderOutputLayer]
]
#dim = 2 * encoderDims[encoderOutputLayer]
]
else
[
encoderOutputLayer = Length (encoderDims)-1
encoder = BS.RNNs.RecurrentLSTMP2Stack (inputEmbedded, embeddingDim, encoderDims, encoderDims, enableSelfStabilization=useStabilizer)
output = encoder[encoderOutputLayer]
#dim = encoderDims[encoderOutputLayer]
]
encoderOutput = encoder.output # TODO: remove .output indirection, no longer needed
# that last frame should be fed as an additional input to every decoder step
# Three ways of passing encoder state:
@ -204,22 +227,25 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
thoughtVector = [
h = Last (encoderOutput.h)
c = Last (encoderOutput.c)
dim = encoder.output.dim
]
thoughtVectorDim = encoderDims[encoderOutputLayer]
thoughtVectorDim = thoughtVector.dim
thoughtVectorPadded = [ # padded with zeroes until end of target sequence
h = Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
c = Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
dim = thoughtVector.dim
]
# attention (fixed rolling window)
attentionWindow = Sequences.PastValueWindow (attentionSpan, encoderOutput.h)
attentionDim = thoughtVectorDim
attentionWindow = Sequences.PastValueWindow (attentionSpan, encoderOutput.h, axis=2) # BUGBUG: We need axis=3 for beam search. Track this down.
projectedAttentionWindowBroadcast = [
W(x) = Parameters.WeightParam (attentionDim, thoughtVectorDim) * Parameters.Stabilize (x, enabled=useStabilizer)
W(x) = TraceDense ( Parameters.WeightParam (attentionDim, thoughtVector.dim), 'Wenc') * Parameters.Stabilize (x, enabled=false/*useStabilizer*/)
#B = Parameters.BiasParam (vocabDim) # no bias in attention
value = Sequences.BroadcastSequenceAs (labelsEmbedded, W (attentionWindow.value)) # apply the projection columnwise to the attentionWindow tensor
valid = Sequences.BroadcastSequenceAs (labelsEmbedded, attentionWindow.valid)
value = Sequences.BroadcastSequenceAs (labelsEmbedded, attentionWindow.value)
projectedValue = Sequences.BroadcastSequenceAs (labelsEmbedded, W (attentionWindow.value)) # apply the projection columnwise to the attentionWindow tensor
valid = Sequences.BroadcastSequenceAs (labelsEmbedded, attentionWindow.valid)
dim = thoughtVector.dim
]
# NYU style: expand h to all, drop c
@ -227,37 +253,73 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
thoughtVectorEverywhere = Boolean.If (Loop.IsFirst (thoughtVectorPadded.h), # if first entry
/*then*/ thoughtVectorPadded.h, # then copy that
/*else*/ Loop.Previous (thoughtVectorEverywhere)) # else just propagate to the front
# TODO: create an indexSequence that contains all zeroes, basically broadcast a single-frame sequence across another sequence length
# TODO: use BroadcastSequenceAs()
# decoder
# NYU style:
# The decoder starts with hidden state 0
# and takes as input [thoughtVectorEverywhere; previous word].
delayedDecoderFeedback = Loop.PreviousOrDefault (defaultValue=labelSentenceStartEmbedded, labelsEmbedded)
# we bake into the LSTMs to multiply h and c with beamSearchReorderHook, which we will patch in decoding
# ReorderTopN (past_h_or_c) = Times (TraceState (past_h_or_c, 'past'), TraceDense (tokens.from, 'backp'))
beamSearchReorderHook = Pass (Constants.One)
# helper functions to delay h and c with possibility to later hook in a different matrix
PreviousHCFromThoughtVectorWithReorderingHook (lstmState) = [ # with thought vector and beam-search hook
isFirst = Loop.IsFirst (initialState.h)
# BUGBUG: Should be thoughtVector, but Scatter() can't expand from inside a loop
h = Boolean.If (isFirst, thoughtVectorPadded.h, Loop.Previous (lstmState.h/* * beamSearchReorderHook*/)) // hidden state(t-1)
c = Boolean.If (isFirst, thoughtVectorPadded.c, Loop.Previous (lstmState.c/* * beamSearchReorderHook*/)) // cell(t-1)
]
PreviousHCWithReorderingHook (lstmState) = [
h = Loop.Previous (lstmState.h/* * beamSearchReorderHook*/) // hidden state(t-1)
c = Loop.Previous (lstmState.c/* * beamSearchReorderHook*/) // cell(t-1)
]
decoderHistoryFromGroundTruth = labelsEmbedded # decoder input for training is ground truth...
decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z)), tag='output') # ...but for testing, it's the output. Make an 'output' to make it a root that is kept
# during training, we use ground truth. For decoding, we will rewire decoderHistoryHook = decoderHistoryFromOutput
decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) # this gets redirected in decoding to feed back decoding output instead
PreviousOrDefault1 (x, defaultValue=Constant (0)) = # a delay node with initial value --TODO: merge the two, then do in C++
[
flags = Loop.IsFirst (defaultValue/*x*/)
out = Boolean.If (flags,
/*then*/ defaultValue,
/*else*/ Loop.Previous (x))
].out
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (Loop.IsFirst (labelSequence), labelSentenceStartEmbedded) # unfortunately needed presently
decoderInput = Pass (PreviousOrDefault1 (defaultValue=labelSentenceStartEmbeddedScattered, decoderHistoryHook))
decoderInputDim = embeddingDim #labelsEmbedded.dim
decoderInputDim = labelsEmbedded.dim #embeddingDim
decoderInput = Pass (delayedDecoderFeedback)
decoderOutputLayer = Length (decoderDims)-1
decoder[i:0..decoderOutputLayer] =
if i == 0
then if useEncoder && useNYUStyle then BS.RNNs.RecurrentLSTMP2 (thoughtVectorDim + decoderInputDim, decoderDims[i], decoderDims[i],
RowStack (thoughtVectorEverywhere : decoderInput),
then if useEncoder && useNYUStyle then BS.RNNs.RecurrentLSTMP2 (decoderDims[i],// cellDim=decoderDims[i],
RowStack (thoughtVectorEverywhere : decoderInput), inputDim=thoughtVectorDim + decoderInputDim,
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
else if useEncoder && attentionSpan > 0 then RecurrentLSTMPWithAttentionWindow2 (thoughtVectorDim + decoderInputDim, decoderDims[i], decoderDims[i],
RowStack (thoughtVectorEverywhere : decoderInput),
else if useEncoder && attentionSpan > 0 then RecurrentLSTMPWithAttentionWindow2 (/*thoughtVectorDim + //<-BUGBUG*/ decoderInputDim, decoderDims[i], decoderDims[i],
/*RowStack (thoughtVectorEverywhere : //<-BUGBUG)*/ (decoderInput),
projectedAttentionWindowBroadcast, attentionDim, attentionSpan,
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
else RecurrentLSTMP2WithInitialState (decoderInputDim, decoderDims[i], decoderDims[i],
decoderInput,
thoughtVectorPadded, # BUGBUG: Should be thoughtVector, but Scatter() can't expand from inside a loop
enableSelfStabilization=useStabilizer)
else BS.RNNs.RecurrentLSTMP2 (decoderDims[i-1], decoderDims[i], decoderDims[i],
decoder[i-1].h,
else BS.RNNs.RecurrentLSTMP2 (decoderDims[i],// cellDim=decoderDims[i],
decoderInput, inputDim=decoderInputDim,
previousHook=PreviousHCFromThoughtVectorWithReorderingHook, # Previous() function with thought vector as initial state
enableSelfStabilization=useStabilizer)
else BS.RNNs.RecurrentLSTMP2 (decoderDims[i],// cellDim=decoderDims[i],
decoder[i-1].h, inputDim=decoderDims[i-1] /*TODO: decoder[i-1].dim*/,
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
#decoderDim = decoderDims[decoderOutputLayer]
decoderOutput = decoder[decoderOutputLayer].h
decoderDim = decoderOutput.dim
#decoderDim = decoderOutput.dim
decoderDim = decoderDims[decoderOutputLayer]
# and add a softmax layer on top
@ -280,7 +342,7 @@ reader = [
#randomize = "auto" # gets ignored
readerType = LMSequenceReader
mode = "softmax"
mode = "softmax" # TODO: find out what this means
nbruttsineachrecurrentiter = 0 # 0 means auto-fill given minibatch size
cacheBlockSize = 100000000 # read block size. This value is large enough to load entire corpus at once
@ -445,12 +507,12 @@ train = [
learningRatesPerSample = 0.007*2:0.0035 #0.01 #0.005 # 0.01
momentumAsTimeConstant = 2500
gradientClippingWithTruncation = true # TODO: clip and truncate? What is the difference?
clippingThresholdPerSample = 15.0
clippingThresholdPerSample = 1 # 15.0 # visibly impacts objectives, but not final result, so keep it for safety
maxEpochs = 50
numMBsToShowResult = 100
firstMBsToShowResult = 10
gradUpdateType = "none" # FSAdaGrad?
loadBestModel = true
loadBestModel = false # true # broken for some models (rereading overwrites something that got set by validation)
# tracing (enable these for debugging)
#traceNodeNamesReal = labelsEmbedded:decoderInput:"decoder[0].lstmState._privateInnards.ht":z.Plus_left.Times_right.result:z:ce
@ -475,6 +537,18 @@ train = [
]
]
#######################################
# DUMP CONFIG #
#######################################
# dumps the model, specifically the learnable parameters
dump = [
action = "dumpnode"
modelPath = "$dumpModelPath$"
outputFile = "$dumpModelPath$.txt"
]
#######################################
# TEST CONFIG #
#######################################
@ -639,7 +713,7 @@ write = [
# reduce back to a single column
topHyps = TraceSparse (topPathScores * OnesTensor (1 : topN), 'topHyps')
inputsOut = Pass (model.inputSequence)
inputsOut = Pass (model.streams_out_input/*inputSequence*/)
labelsOut = Pass (TraceOneHot (model.labelSequence, 'labels'))
decodeOut = Pass (TraceOneHot (top1, 'out'))
topNOut = Pass (topHyps)
@ -653,9 +727,9 @@ write = [
PreviousOrDefault1 (x, defaultValue=Constant (0)) = # a delay node with initial value --TODO: merge the two, then do in C++
[
flags = IsFirst (defaultValue/*x*/)
out = If (flags,
/*then*/ defaultValue,
/*else*/ Previous (x))
out = BS.Boolean.If (flags,
/*then*/ defaultValue,
/*else*/ Previous (x))
].out
labelSentenceStart = modelAsTrained.labelSentenceStart_out # _ is a hack
@ -667,7 +741,7 @@ write = [
delayedDecoderFeedback = TraceDense (/*Loop.*/PreviousOrDefault1 (defaultValue=labelSentenceStartEmbeddedScattered, TraceDense (decoderFeedback, 'lemb')) , 'prev lemb')
greedyDecodingModel = BS.Network.Edit (modelAsTrained,
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.delayedDecoderFeedback, delayedDecoderFeedback),
BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.decoderInput/*delayedDecoderFeedback*/, delayedDecoderFeedback),
modelAsTrained.z/*dummy for now since cannot pass empty set*/)
# beam search of width 'beamDepth'
@ -679,7 +753,7 @@ write = [
# decoder[0].prevState.h.elseVal = PastValue (decoder[0].lstmState._privateInnards.ht) : [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h] -> [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h]
# decoder[0].prevState.c.elseVal = PastValue (decoder[0].lstmState._privateInnards.ct) : [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h] -> [512 x 1 x labelSequence.h.out.h.indexSequence.h.indexSequence.h]
hiddenDim = modelAsTrained.delayedDecoderFeedback.dim
hiddenDim = modelAsTrained.decoderFeedback.dim
embeddingDim = modelAsTrained.decoderOutputEmbedded.dim
vocabSize = modelAsTrained.z.dim
@ -702,19 +776,55 @@ write = [
# - traceback is a right-to-left recurrence
# - output best hypo conditioned on the path (it is already known)
propagationEdits[i:0..8] = // TODO: implement and use { } syntax TODO: VV elseVal only for non-NYU?
# attention:
# tanHOut = Tanh (TraceDense( projectedAttentionWindowBroadcast.projectedValue, 'hencp') + TraceDense ( projectedH, 'hdecp')) # [attentionDim x attentionSpan]
# decoder[0].tanHOut.z = Plus (decoder[0].tanHOut.z.PlusArgs[0], decoder[0].tanHOut.z.PlusArgs[1]) : [128 x 20 x WhereNodeAxis1], [128] -> [128 x 20 x WhereNodeAxis1]
# patch PlusArgs[0]
# uValid = u + Log (projectedAttentionWindowBroadcast.valid) # [1 x attentionSpan]
# decoder[0].uValid = Plus (decoder[0].u, decoder[0].uValid.PlusArgs[1]) : [1 x 20 x WhereNodeAxis1], [1 x 20 x WhereNodeAxis1] -> [1 x 20 x WhereNodeAxis1]
# patch PlusArgs[1]
# weightedAttentionWindow = projectedAttentionWindowBroadcast.value .* attentionWeights # [encoderHiddenDim x attentionSpan]
# decoder[0].weightedAttentionWindow = ElementTimes (projectedAttentionWindowBroadcast.value.out, decoder[0].attentionWeights) : [512 x 20 x WhereNodeAxis1], [1 x 20 x WhereNodeAxis1] -> [512 x 20 x WhereNodeAxis1]
# patch ElementTimesArgs[0]
# each:
# node -> SplitDimension (node, /*axis=*/, 1 /*->0:1*/)
# e.g.
# [512 x 20 x *] -> [(0:1) x 20 x *hereNodeAxis13] -> [512 x 1 x 20 x *]
# decoder[0].weightedAttentionAverage = Times (decoder[0].weightedAttentionWindow, decoder[0].weightedAttentionAverage.TimesArgs[1]) : [512 x 1 x 20 x WhereNodeAxis11], [20] -> [512] FAILED
# change to outputRank=2
# attentionWeights = TraceDense( Softmax (uValid) ,'weights') # [1 x attentionSpan]
# decoder[0].attentionWeights.h = Softmax (decoder[0].uValid) : [1 x 3 x 20 x WhereNodeAxis21] -> [1 x 3 x 20 x WhereNodeAxis21]
# path SoftmaxArgs[0] to be column-wise over axis 3
ColumnwiseSoftmax (axis=1, z) = [ n = TraceDense( Softmax (z), 'smz') ; axis1 = axis ; d = TraceDense( ReducePlus (axis=axis1, n), 'denom') ; p = TraceDense( n .* Reciprocal (d), 'p') ].p
#Columnwise (f, beamDepth, z) = # TODO: Takes LogSoftmax over axis=1. it is more tricky to do this over arbitrary axes
#[
# cols[d:0..beamDepth-1] = f (Slice (d, d+1, z, axis=2) /*[:,d]*/ )
# out = Splice (cols, axis=2)
#].out
InjectDepth (node) = SplitDimension (node, /*axis=*/1, 1 /*->0:1*/)
propagationEdits[i:0..13] = // TODO: implement and use { } syntax TODO: VV elseVal only for non-NYU?
# non-NYU:
if i == 0 then (node => if node.name == 'decoder[0].prevState.h.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 1 then (node => if node.name == 'decoder[0].prevState.c.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
if i == 0 then (node => if node.name == 'decoder[0].prevState.h.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 1 then (node => if node.name == 'decoder[0].prevState.c.elseVal' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
# NYU:
else if i == 2 then (node => if node.name == 'decoder[0].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 3 then (node => if node.name == 'decoder[0].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
else if i == 2 then (node => if node.name == 'decoder[0].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 3 then (node => if node.name == 'decoder[0].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
# all:
else if i == 4 then (node => if node.name == 'decoder[1].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 5 then (node => if node.name == 'decoder[1].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
else if i == 6 then (node => if node.name == 'decoder[2].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 7 then (node => if node.name == 'decoder[2].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
else BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.delayedDecoderFeedback, delayedDecoderFeedback)
else if i == 4 then (node => if node.name == 'decoder[1].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 5 then (node => if node.name == 'decoder[1].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
else if i == 6 then (node => if node.name == 'decoder[2].prevState.h' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node) # inject reshuffling of hypotheses
else if i == 7 then (node => if node.name == 'decoder[2].prevState.c' then TraceState (Previous (ReorderTopN (node.PastValueArgs[0])), 'propagated') else node)
# attention:
else if i == 8 then (node => if node.name != 'decoder[0].tanHOut.z' then node else InjectDepth (node.PlusArgs[0]) + node.PlusArgs[1])
else if i == 9 then (node => if node.name != 'decoder[0].uValid' then node else node.PlusArgs[0] + InjectDepth (node.PlusArgs[1]))
else if i == 10 then (node => if node.name != 'decoder[0].weightedAttentionWindow' then node else InjectDepth (node.ElementTimesArgs[0]) .* node.ElementTimesArgs[1])
else if i == 11 then (node => if node.name != 'decoder[0].weightedAttentionAverage' then node else Times (node.TimesArgs[0], node.TimesArgs[1], outputRank=2))
else if i == 12 then (node => if node.name != 'decoder[0].attentionWeights.h' then node else ColumnwiseSoftmax (axis=3, node.SoftmaxArgs[0]))
else BS.Network.Editing.ReplaceLinksToNode (modelAsTrained.decoderInput/*delayedDecoderFeedback*/, delayedDecoderFeedback)
# decoderFeedback must be updated to take actual decoder output
@ -808,7 +918,7 @@ write = [
# +-----+
# tokens.word:
#tokens.word = ReduceSum (axis=2, topPaths) # TODO: add an axis parameter to SumColumnElements()
#tokens.word = ReducePlus (axis=2, topPaths) # TODO: add an axis parameter to SumColumnElements()
# +-+
# |0|
# |0|-+

Просмотреть файл

@ -233,7 +233,7 @@ READER_SRC =\
$(SOURCEDIR)/Readers/ReaderLib/ChunkRandomizer.cpp \
$(SOURCEDIR)/Readers/ReaderLib/SequenceRandomizer.cpp \
$(SOURCEDIR)/Readers/ReaderLib/SequencePacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/BpttPacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/TruncatedBpttPacker.cpp \
$(SOURCEDIR)/Readers/ReaderLib/PackerBase.cpp \
$(SOURCEDIR)/Readers/ReaderLib/FramePacker.cpp \

Просмотреть файл

@ -1,6 +1,9 @@
# CNTK
## Latest news
*2016-04-25.* V 1.1 Binary release
CNTK v.1.1 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases/tag/v1.1)
*2016-04-12.* CNTK is available as [Azure Virtual Machines](https://github.com/Microsoft/CNTK/wiki/CNTK-on-Azure) and [Docker Containers](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers)
*2016-04-12.* Added support for ND convolution and ND pooling and CPU support for `cudnn` layout in convolution, pooling and batch normalization nodes.
@ -8,10 +11,6 @@ Read [documentation](https://github.com/Microsoft/CNTK/wiki/Full-NDL-Function-Re
*2016-04-05.* CUDA7.5 support for Windows Build: Windows project files have been updated to automatically utilize CUDA 7.5 if present
## March 2016
*2016-03-24.* New Text Reader (CNTKTextFormatReader) is available
Read description here https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK

Просмотреть файл

@ -24,36 +24,17 @@
<RootNamespace>CNTK</RootNamespace>
<ProjectName>ActionsLib</ProjectName>
</PropertyGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<LinkIncremental>true</LinkIncremental>
<PreBuildEventUseInBuild>false</PreBuildEventUseInBuild>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ExecutablePath)</ExecutablePath>
<PreBuildEventUseInBuild>false</PreBuildEventUseInBuild>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\CNTK;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
@ -66,57 +47,34 @@
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\multiverso;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll; nvml.dll</DelayLoadDLLs>
<StackReserveSize>100000000</StackReserveSize>
<AdditionalDependencies>Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>%(PreprocessorDefinitions);CPUONLY</PreprocessorDefinitions>
</ClCompile>
<Link>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(GpuBuild)">
@ -125,9 +83,10 @@
</ClCompile>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
<DelayLoadDLLs>%(DelayLoadDLLs);nvml.dll</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)"</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -148,13 +107,6 @@
<ClInclude Include="SimpleNetworkBuilder.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="NetworkDescriptionLanguage.cpp" />
<ClCompile Include="NetworkFactory.cpp" />
<ClCompile Include="SimpleNetworkBuilder.cpp" />
@ -166,4 +118,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Просмотреть файл

@ -9,16 +9,18 @@
#include "NetworkDescriptionLanguage.h"
#include "NDLNetworkBuilder.h"
#include "ConvolutionalNodes.h"
#include "DeprecatedNodes.h"
#include "EvaluationNodes.h"
#include "InputAndParamNodes.h"
#include "LinearAlgebraNodes.h"
#include "NonlinearityNodes.h"
#include "ConvolutionalNodes.h"
#include "RecurrentNodes.h"
#include "PreComputeNodes.h"
#include "ReshapingNodes.h"
#include "RecurrentNodes.h"
#include "SpecialPurposeNodes.h"
#include "TrainingNodes.h"
#include "PreComputeNodes.h"
#include "EvaluationNodes.h"
using namespace std;
@ -156,6 +158,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(CRFNode), L"CRF")) ret = true;
#endif
else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ClipNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ConvolutionNode), L"Convolve")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(PoolingNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(CosDistanceNode), L"CosDist")) ret = true;
@ -170,11 +173,13 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(ElementTimesNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ErrorPredictionNode), L"ClassificationError")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ExpNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(FloorNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(FutureValueNode))) ret = true;
#ifdef COMING_SOON
else if (EqualInsensitive(nodeType, OperationNameOf(GMMLogLikelihoodNode), L"GMMLL")) ret = true;
#endif
else if (EqualInsensitive(nodeType, OperationNameOf(HardmaxNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(IfNode), L"If")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(InputValue), L"Input")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(InvStdDevNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(KhatriRaoProductNode), L"ColumnwiseCrossProduct")) ret = true;

Просмотреть файл

@ -510,11 +510,11 @@ template <typename ElemType>
void DoTopologyPlot(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring outputDotFile = config(L"outputDotFile"); // filename for the dot language output, if not specified, %modelpath%.dot will be used
wstring outputFile = config(L"outputFile"); // filename for the rendered topology plot
wstring outputDotFile = config(L"outputDotFile", L""); // filename for the dot language output, if not specified, %modelpath%.dot will be used
wstring outputFile = config(L"outputFile", L""); // filename for the rendered topology plot
// this can be empty, in that case no rendering will be done
// or if this is set, renderCmd must be set, so CNTK will call re
wstring renderCmd = config(L"renderCmd"); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
wstring renderCmd = config(L"renderCmd", L""); // if this option is set, then CNTK will call the render to convert the outdotFile to a graph
// e.g. "d:\Tools\graphviz\bin\dot.exe -Tpng -x <IN> -o<OUT>"
// where <IN> and <OUT> are two special placeholders
@ -544,7 +544,8 @@ void DoTopologyPlot(const ConfigParameters& config)
renderCmd = msra::strfun::ReplaceAll(renderCmd, wstring(L"<OUT>"), outputFile);
}
if (!renderCmd.empty())
{
fprintf(stderr, "Executing third-party tool for rendering dot:\n%ls\n", renderCmd.c_str());
#ifdef __unix__
auto rc = system(msra::strfun::utf8(renderCmd).c_str());
@ -552,7 +553,8 @@ void DoTopologyPlot(const ConfigParameters& config)
#else
_wsystem(renderCmd.c_str());
#endif
fprintf(stderr, "Done.\n");
}
fprintf(stderr, "Done.\n");
}
template void DoTopologyPlot<float>(const ConfigParameters& config);

Просмотреть файл

@ -15,10 +15,7 @@ Format(value, format) = new StringFunction [ what = 'Format' ; arg = value ; how
Replace(s, from, to) = new StringFunction [ what = 'Replace' ; arg = s ; replacewhat = from ; withwhat = to ]
Substr(s, begin, num) = new StringFunction [ what = 'Substr' ; arg = s ; pos = begin ; chars = num ]
Chr(c) = new StringFunction [ what = 'Chr' ; arg = c ]
Floor(x) = new NumericFunction [ what = 'Floor' ; arg = x ]
Length(x) = new NumericFunction [ what = 'Length' ; arg = x ]
Ceil(x) = -Floor(-x)
Round(x) = Floor(x+0.5)
Sign(x) = if x > 0 then 1 else if x < 0 then -1 else 0
Min(a,b) = if a < b then a else b
Max(a,b) = if a > b then a else b
@ -29,7 +26,7 @@ IsSameObject(a,b) = new CompareFunction [ what = 'IsSameObject' ; args = (a : b)
# ComputationNodes
##############################################################################
LearnableParameter(rows, cols, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]
LearnableParameter (outputDim, inputDim, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
Parameter = LearnableParameter // deprecated
# TODO: make Parameter take tensor dims?
ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
@ -41,6 +38,7 @@ SparseInput(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operati
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
EnvironmentInput(propertyName, tag='') = new ComputationNode [ operation = 'EnvironmentInput' /*plus the function args*/ ]
# TODO: make 'dims' the first parameter, think ConstantTensor<dims> (val)
ConstantTensor(val, dims, tag='') = ParameterTensor(dims, learningRateMultiplier = 0, init = 'fixedValue', value = val)
Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, learningRateMultiplier = 0, init = 'fixedValue', value = val)
PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
@ -94,7 +92,9 @@ Delay = PastValue
BatchNormalization(input, scale, bias, runMean, runInvStdDev, spatial, normalizationTimeConstant = 0, blendTimeConstant = 0, epsilon = 0.00001, useCntkEngine = true, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runInvStdDev) /*plus the function args*/ ]
Abs(x, tag='') = new ComputationNode [ operation = 'Abs' ; inputs = x /*plus the function args*/ ]
Ceil(x, tag='') = Negate(Floor(Negate(x)), tag=tag)
ClassBasedCrossEntropyWithSoftmax(labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax, tag='') = new ComputationNode [ operation = 'ClassBasedCrossEntropyWithSoftmax' ; inputs = (labelClassDescriptorVectorSequence : mainInputInfo : mainWeight : classLogProbsBeforeSoftmax) /*plus the function args*/ ]
Clip(minValue, maxValue, x, tag='') = new ComputationNode [ operation = 'Clip' ; inputs = (minValue : maxValue : x) /* plus the function args*/ ]
ColumnElementTimes(aVectorSequence, anotherVectorSequence, tag='') = new ComputationNode [ operation = 'ColumnElementTimes' ; inputs = (aVectorSequence : anotherVectorSequence) /*plus the function args*/ ]
// TODO: ColumnElementTimes = ElementTimes
CosDistance(aVectorSequence, anotherVectorSequence, tag='') = new ComputationNode [ operation = 'CosDistance' ; inputs = (aVectorSequence : anotherVectorSequence) /*plus the function args*/ ]
@ -109,11 +109,13 @@ ElementTimes(aMatrix, anotherMatrix, tag='') = new ComputationNode [ operation =
ElementDivide(aMatrix, anotherMatrix, tag='') = ElementTimes(aMatrix, Reciprocal(anotherMatrix), tag=tag)
ErrorPrediction(labelVectorSequence, outVectorSequence, tag='') = new ComputationNode [ operation = 'ErrorPrediction' ; inputs = (labelVectorSequence : outVectorSequence) /*plus the function args*/ ]
Exp(x, tag='') = new ComputationNode [ operation = 'Exp' ; inputs = x /*plus the function args*/ ]
Floor(x, tag='') = new ComputationNode [ operation = 'Floor' ; inputs = x /*plus the function args*/ ]
GatherPacked(indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'GatherPacked' ; inputs = (indexSequence : sourceData) /*plus the function args*/ ]
GMMLogLikelihood(unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence, tag='') = new ComputationNode [ operation = 'GMMLogLikelihood' ; inputs = (unnormalizedPriorVector : meansAsRows : logStdDevAsRows : dataVectorSequence) /*plus the function args*/ ]
InvStdDev(dataVectorSequence, tag='') = new ComputationNode [ operation = 'InvStdDev' ; inputs = dataVectorSequence /*plus the function args*/ ]
KhatriRaoProduct(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'KhatriRaoProduct' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
Log(x, tag='') = new ComputationNode [ operation = 'Log' ; inputs = x /*plus the function args*/ ]
LogPlus(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'LogPlus' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
LogSoftmax(z, tag='') = new ComputationNode [ operation = 'LogSoftmax' ; inputs = z /*plus the function args*/ ]
MatrixL1Reg(matrix, tag='') = new ComputationNode [ operation = 'MatrixL1Reg' ; inputs = matrix /*plus the function args*/ ]
MatrixL2Reg(matrix, tag='') = new ComputationNode [ operation = 'MatrixL2Reg' ; inputs = matrix /*plus the function args*/ ]
@ -127,6 +129,12 @@ PerDimMeanVarNormalization(dataVectorSequence, meanVector, invStdDevVector, tag=
Plus(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'Plus' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
Reciprocal(z, tag='') = new ComputationNode [ operation = 'Reciprocal' ; inputs = z /*plus the function args*/ ]
RectifiedLinear(z, tag='') = new ComputationNode [ operation = 'RectifiedLinear' ; inputs = z /*plus the function args*/ ]
ReducePlus (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Plus" /*plus the function args*/ ]
#ReduceLogPlus (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "LogPlus" /*plus the function args*/ ]
#ReduceMean (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Mean" /*plus the function args*/ ]
#ReduceMax (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Max" /*plus the function args*/ ]
#ReduceMin (z, axis=0, tag='') = new ComputationNode [ operation = 'ReduceElements' ; inputs = z ; reductionOp = "Min" /*plus the function args*/ ]
Round(x, tag='') = Floor(Plus(x, ConstantTensor(0.5, (1))), tag=tag)
Scale(scalarScalingFactor, matrix, tag='') = new ComputationNode [ operation = 'Scale' ; inputs = (scalarScalingFactor : matrix) /*plus the function args*/ ]
// TODO: Scale = ElementTimes
ScatterPacked(cond, indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'ScatterPacked' ; inputs = (cond : indexSequence : sourceData) /*plus the function args*/ ]
@ -136,8 +144,7 @@ Softmax(z, tag='') = new ComputationNode [ operation = 'Softmax' ; inputs = z /*
Hardmax(z, tag='') = new ComputationNode [ operation = 'Hardmax' ; inputs = z /*plus the function args*/ ]
Sqrt(z, tag='') = new ComputationNode [ operation = 'Sqrt' ; inputs = z /*plus the function args*/ ]
SquareError(aMatrix, anotherMatrix, tag='') = new ComputationNode [ operation = 'SquareError' ; inputs = (aMatrix : anotherMatrix) /*plus the function args*/ ]
SumColumnElements(z, tag='') = new ComputationNode [ operation = 'SumColumnElements' ; inputs = z /*plus the function args*/ ]
# ^^ TODO: Rename to SumElements? ReduceSum without axis?
SumColumnElements(z, tag='') = new ComputationNode [ operation = 'SumColumnElements' ; inputs = z /*plus the function args*/ ] // deprecated
SumElements(matrix, tag='') = new ComputationNode [ operation = 'SumElements' ; inputs = matrix /*plus the function args*/ ]
# ^^ TODO: Rename to ReduceSumMB?
Tanh(z, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = z /*plus the function args*/ ]
@ -212,7 +219,7 @@ Boolean = [
# select a value
# Note: This will be replaced by BrainScript 'if cond then thenVal else elseVal' and SwitchNode
If (cond, thenVal, elseVal) = cond .* thenVal + Not (cond) .* elseVal
If (cond, thenVal, elseVal, tag='') = new ComputationNode [ operation = 'If' ; inputs = (cond : thenVal : elseVal) /*plus the function args*/ ]
]
##############################################################################
@ -223,16 +230,24 @@ Boolean = [
Sequences = [
# broadcast a single-step sequence to a multi-step sequence
BroadcastSequenceAs (type, data1) = [ # type=example sequence with desired length (outside of a loop), data1=1 time step
ZeroSequenceLike (x) = RowSlice (0, 1, x) .* Constants.Zero # BUGBUG: SumColumnElements() has a CPU/GPU problem
index = /*Constants.*/ZeroSequenceLike (type) # create an index sequence [ 0 0 0 ... ] of target length
packedIndex = PackedIndex (data1, index) # convert into internal packed index w.r.t. 'data1'
out = GatherPacked (packedIndex, data1) # copy data1[0] to all elements, total length like 'type'
# BUGBUG: This should work but gives worse results.
#ZeroSequenceLike (x) = RowSlice (0, 1, x) .* Constants.Zero # BUGBUG: SumColumnElements() has a CPU/GPU problem
#index = /*Constants.*/ZeroSequenceLike (type) # create an index sequence [ 0 0 0 ... ] of target length
#packedIndex = PackedIndex (data1, index) # convert into internal packed index w.r.t. 'data1'
#out = GatherPacked (packedIndex, data1) # copy data1[0] to all elements, total length like 'type'
# alternative (slower, older) implementation (10% slower end-to-end?)
# Gives nearly the same result, but not completely. Since Gather() above has an atomicAdd(), let's leave this on for now and check later.
dataPadded = Sequences.Scatter (Loop.IsFirst (type), data1) # padded with zeroes until end of target sequence
out = Boolean.If (Loop.IsFirst (dataPadded), # if first entry
/*then*/ dataPadded, # then copy that
/*else*/ Loop.Previous (out)) # else just propagate to the front
].out
# rolling window over past N samples
# returns a record [ value=..., valid=... ]
# returns a record [ value=..., valid=... ], both being 1-step sequences of [dim x N]. N can optionally be moved to axes >2.
# This implementation is suboptimal in that it creates copies for the intermediate steps.
PastValueWindow (N, in) = [
PastValueWindow (N, in, axis=2) = [
delayLine[t:0..N-1] = [ # shift register for encoder, last N inputs
value = if t == 0
then in # delay 0: current value
@ -243,8 +258,12 @@ Sequences = [
]
# delayLine[t].value = value of t steps in the past
# delayLine[t].valid = true if we had a value t steps in the past
value = Slice (-1, 0, axis=-1, SplitDimension (RowStack (array[0..N-1](t=>delayLine[t].value)), 1, N)) # [i, delay]
valid = Slice (-1, 0, axis=-1, SplitDimension (RowStack (array[0..N-1](t=>delayLine[t].valid)), 1, N)) # [i, delay]
SplitStack (x) =
if axis == 2 then SplitDimension (x, 1, N)
else if axis > 2 then TransposeDimensions (SplitDimension (x, 1, N), 2, axis)
else Fail ("PastValueWindow: axis>2 required.") # BUGBUG: We also require that input is a single vector. Address later.
value = Slice (-1, 0, axis=-1, SplitStack (RowStack (array[0..N-1](t=>delayLine[t].value)))) # [i, delay]
valid = Slice (-1, 0, axis=-1, SplitStack (RowStack (array[0..N-1](t=>delayLine[t].valid)))) # [i, delay]
]
# fold left/right: Reduce entire sequence by applying binaryOp, e.g. FoldL (Plus, 0, input)
@ -287,7 +306,7 @@ Sequences = [
out = Gather (selected, x)
].out
# Last and TakeRight
Last(x) = TakeRight(1, x)
Last (x) = TakeRight (1, x)
TakeRight (N, x) = _Take (FutureValue, N, x)
Skip (N, x) = if N > 0 then _Skip (PastValue, N, x) else x
_Skip (DelayFn, N, x) = [ // TODO: merge with _Take
@ -374,14 +393,17 @@ Parameters =
BiasParam (dim) = ParameterTensor ((dim), init='fixedValue', value=0.0)
ScalarParam() = BiasParam (1)
# route input through an extra scalar weight, for stabilization
Stabilize (x, enabled=true) =
# route input through an extra weight, for stabilization
StabilizeElements (x, inputDim=x.dim, enabled=true) =
if enabled
then [
beta = Exp (ScalarParam())
result = Scale (beta, x)
].result
else x
beta = Exp (BiasParam ((inputDim)))
result = beta .* x
].result
else x
# and the same with a scalar stabilizer shared across all components
Stabilize (x, enabled=true) = if enabled then StabilizeElements (x, inputDim=1, enabled=true) else x
]
##############################################################################
@ -393,19 +415,20 @@ RNNs =
# LSTMP -- LSTM function with projection and self-stabilization
# Projection it enabled by passing different values for outputDim and cellDim.
# This is the stateless version that takes the previous state as an input.
# It returns a dictionary with two members: h and c. prevState must be in the same format.
// TODO: Standardize on one parameter order. Is first dimension the output (like in math, strcpy, or functional style) or the input (listing inputs first)?
// If we change this, we'd need to fix the LSTM end-to-end test.
LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=false) =
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
[
#inputDim = x.dim # get dimension from 'x' (if this works, we can remove the inputDim1 parameter)
_privateInnards = [ // encapsulate the privateInnards workings
# TODO: rename to just _
_privateInnards = [ // encapsulate the inner workings
dh = prevState.h // previous values
dc = prevState.c
// parameter macros--these carry their own weight matrices
B() = Parameters.BiasParam (cellDim)
#inputDim1 = inputDim
#W(v) = Parameters.WeightParam (cellDim, inputDim) * Parameters.StabilizeElements (v, inputDim=inputDim1, enabled=enableSelfStabilization) // input-to-hidden
# ^^ element-wise stab, use if input is a concatenation; vv stab for entire matrix
W(v) = Parameters.WeightParam (cellDim, inputDim) * Parameters.Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
H(h) = Parameters.WeightParam (cellDim, outputDim) * Parameters.Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
C(c) = Parameters.DiagWeightParam (cellDim) .* Parameters.Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
@ -423,55 +446,52 @@ RNNs =
ht = ot .* Tanh (ct) // applied to tanh(cell(t))
]
// LSTM cell
# TODO: This is temporary test code for the new ShiftNode (until we switch PastValue() itself over)
#PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1)
#PastValue1 = PastValue
#PastValue1 = PastValueShift
# our return values
c = _privateInnards.ct // cell value
h = if outputDim != cellDim // output/hidden state
then [ // project
Wmr = Parameters.WeightParam(outputDim, cellDim);
Wmr = Parameters.WeightParam (outputDim, cellDim);
htp = Wmr * Parameters.Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
].htp // TODO: ^^ extend BS syntax to allow to say: then [ Wmr = WeightParam(outputDim, cellDim) ] in Wmr * Stabilize (...)
else _privateInnards.ht // no projection
dim = outputDim
]
# helper function to delay h and c
# Callers can provide their own, e.g. useful for beam decoding.
PreviousHC (lstmState) = [
h = Loop.Previous (lstmState.h) // hidden state(t-1)
c = Loop.Previous (lstmState.c) // cell(t-1)
]
# pass previousHook=BS.RNNs.NextHC instead of PreviousHC to get a right-to-left recurrence
NextHC (lstmState) = [
h = Loop.Next (lstmState.h) // hidden state(t-1)
c = Loop.Next (lstmState.c) // cell(t-1)
]
# this implements a recurrent (stateful) LSTM with projection and self-stabilization
RecurrentLSTMP (inputDim, outputDim, cellDim, x, enableSelfStabilization=false) =
# It returns a record (h,c). To use its output, say .h
# By default, this is left-to-right. Pass previousHook=BS.RNNs.NextHC for a right-to-left model.
# TODO: remove the -2 once this works
RecurrentLSTMP = RecurrentLSTMP2
RecurrentLSTMP2 (outputDim, cellDim=outputDim.dim, x, inputDim=x.dim, previousHook=PreviousHC, enableSelfStabilization=false) =
[
prevState =
[
h = Loop.Previous (lstmState.h) // hidden state(t-1)
c = Loop.Previous (lstmState.c) // cell(t-1)
]
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState.h // that's the value we return
# same as RecurrentLSTMP but returns both h and c
RecurrentLSTMP2 (inputDim, outputDim, cellDim, x, enableSelfStabilization=false) =
[
prevState =
[
h = Loop.Previous (lstmState.h) # hidden state(t-1)
c = Loop.Previous (lstmState.c) # cell(t-1)
]
enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (inputDim, outputDim, cellDim, x, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // that's the value we return
prevState = previousHook (lstmState)
inputDim1 = inputDim ; cellDim1 = cellDim ; enableSelfStabilization1 = enableSelfStabilization // TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // we return the state record (h,c)
# a stack of recurrent LSTMs (unidirectional)
RecurrentLSTMP2Stack (input, inputDim, hiddenDims, cellDims, enableSelfStabilization=false) = [
useStabilizer = enableSelfStabilization
layer[i:0..Length (hiddenDims)-1] =
RecurrentLSTMP2 (if i == 0 then inputDim else hiddenDims[i-1],
hiddenDims[i], cellDims[i],
if i == 0 then input else layer[i-1].h,
RecurrentLSTMPStack = RecurrentLSTMP2Stack # TODO: remove the -2 name once this works
RecurrentLSTMP2Stack (hiddenDims, cellDims=hiddenDims, input, inputDim=input.dim, previousHook=PreviousHC, enableSelfStabilization=false) = [
previousHook1 = previousHook ; useStabilizer = enableSelfStabilization
layers[i:0..Length (hiddenDims)-1] =
RecurrentLSTMP2 (hiddenDims[i], cellDim=cellDims[i],
if i == 0 then input else layers[i-1].h, inputDim=if i == 0 then inputDim else hiddenDims[i-1] /*TODO: layers[i-1].dim*/,
previousHook=previousHook1,
enableSelfStabilization=useStabilizer)
].layer
].layers
]
##############################################################################

Просмотреть файл

@ -80,7 +80,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ActionsLib.lib; SGDLib.lib; ComputationNetworkLib.lib; Math.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ActionsLib.lib; SGDLib.lib; ComputationNetworkLib.lib; Math.lib; Common.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll; msmpi.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
<StackReserveSize>100000000</StackReserveSize>
</Link>
@ -107,7 +107,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ActionsLib.lib; SGDLib.lib; ComputationNetworkLib.lib; Math.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ActionsLib.lib; SGDLib.lib; ComputationNetworkLib.lib; Math.lib; Common.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; msmpi.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
<StackReserveSize>100000000</StackReserveSize>
@ -124,12 +124,8 @@
<DelayLoadDLLs>Math.dll; msmpi.dll;</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Message Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">Copying dependencies</Message>
<Command Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">xcopy /I /D /Y $(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs $(TargetDir)</Command>
</PostBuildEvent>
<PostBuildEvent>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">Copying dependencies</Message>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">xcopy /I /D /Y $(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs $(TargetDir)</Command>
<Message>Copying dependencies</Message>
<Command>xcopy /I /D /Y "$(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs" "$(TargetDir)"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(GpuBuild)">
@ -140,7 +136,7 @@
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>xcopy /I /D /Y $(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs $(TargetDir) &amp;&amp; if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>xcopy /I /D /Y "$(ProjectDir)BrainScript\CNTKCoreLib\CNTK.core.bs" "$(TargetDir)" &amp;&amp; if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)"</Command>
<Message>Copying dependencies</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -192,21 +188,6 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\DataReader.cpp" />
<ClCompile Include="..\Common\DataWriter.cpp" />
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\Include\ConcStack.h" />
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="..\Common\MPIWrapper.cpp" />
<ClCompile Include="BrainScript\BrainScriptEvaluator.cpp" />
<ClCompile Include="BrainScript\BrainScriptParser.cpp" />
<ClCompile Include="BrainScript\BrainScriptTest.cpp" />

Просмотреть файл

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_CpuOnly|x64">
<Configuration>Debug_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_CpuOnly|x64">
<Configuration>Release_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{86883653-8A61-4038-81A0-2379FAE4200A}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CNTK</RootNamespace>
<ProjectName>Common</ProjectName>
</PropertyGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(MSMPI_INC)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>%(PreprocessorDefinitions);CPUONLY</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="Config.cpp" />
<ClCompile Include="DataReader.cpp" />
<ClCompile Include="DataWriter.cpp" />
<ClCompile Include="Eval.cpp" />
<ClCompile Include="ExceptionWithCallStack.cpp" />
<ClCompile Include="File.cpp" />
<ClCompile Include="fileutil.cpp" />
<ClCompile Include="MPIWrapper.cpp" />
<ClCompile Include="TimerUtility.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

Просмотреть файл

@ -149,8 +149,15 @@ void File::Init(const wchar_t* filename, int fileOptions)
/*static*/ wstring File::DirectoryPathOf(wstring path)
{
#ifdef _WIN32
// Win32 accepts forward slashes, but it seems that PathRemoveFileSpec() does not
// TODO:
// "PathCchCanonicalize does the / to \ conversion as a part of the canonicalization, its
// probably a good idea to do that anyway since I suspect that the '..' characters might
// confuse the other PathCch functions" [Larry Osterman]
// "Consider GetFullPathName both for canonicalization and last element finding." [Jay Krell]
path = msra::strfun::ReplaceAll<wstring>(path, L"/", L"\\");
HRESULT hr;
path = msra::strfun::ReplaceAll<wstring>(path, L"/", L"\\"); // Win32 accepts forward slashes, but it seems that PathRemoveFileSpec() does not
if (IsWindows8OrGreater()) // PathCchRemoveFileSpec() only available on Windows 8+
{
typedef HRESULT(*PathCchRemoveFileSpecProc)(_Inout_updates_(_Inexpressible_(cchPath)) PWSTR, _In_ size_t);

Просмотреть файл

@ -36,6 +36,9 @@ enum NodeGroup
};
// IEvaluateModel - interface used by decoders and other components that need just evaluator functionality in DLL form
// NOTICE: This interface is a public interface for evaluating models in CNTK.
// Changes to this interface may affect other projects, such as Argon and LatGen,
// and therefore need to be communicated with such groups.
template <class ElemType>
class IEvaluateModel // Evaluate Model Interface
{

Просмотреть файл

@ -487,7 +487,7 @@ private:
void CheckIsValid() const
{
if (m_numFramesDeclared != GetNumCols())
LogicError("MBLayout: Attempting to read out flags, but only only %d out of %d frames have been defined.",
LogicError("MBLayout: Attempting to read out flags, but only %d out of %d frames have been defined.",
(int) m_numFramesDeclared, (int) (m_numTimeSteps * m_numParallelSequences));
}

Просмотреть файл

@ -701,6 +701,13 @@ public:
return s;
}
// pretty-printing, wstring version
operator std::wstring() const
{
std::string s = this->operator std::string();
return msra::strfun::utf16(s);
}
private:
// reset m_strides and m_offset to represent a canonical no-strides column-major tensor
void InitAsNoSlice()

Просмотреть файл

@ -29,8 +29,17 @@ struct ComputationEnvironment
{
// networkOperationMode tells whether we are training or inferring, which affects some nodes' behavior
NetworkOperationMode m_networkOperationMode = NetworkOperationMode::inferring; // by default, a network is always able to infer
bool IsInferring() const { return m_networkOperationMode == NetworkOperationMode::inferring; }
bool IsTraining() const { return m_networkOperationMode == NetworkOperationMode::training; }
bool IsPreComputing() const { return m_networkOperationMode == NetworkOperationMode::preComputing; }
//set new value and return old one
NetworkOperationMode SetOperationMode(NetworkOperationMode mode)
{
NetworkOperationMode oldMode = m_networkOperationMode;
m_networkOperationMode = mode;
return oldMode;
}
// more properties should be added here as needed
};
typedef std::shared_ptr<ComputationEnvironment> ComputationEnvironmentPtr;
@ -48,12 +57,11 @@ public:
ScopedNetworkOperationMode(const std::shared_ptr<ComputationNetwork>& net, NetworkOperationMode networkOperationMode) :
m_environment(net->Environment())
{
m_previousNetworkOperationMode = m_environment.m_networkOperationMode;
m_environment.m_networkOperationMode = networkOperationMode;
m_previousNetworkOperationMode = m_environment.SetOperationMode(networkOperationMode);
}
~ScopedNetworkOperationMode() // destructor restores the previous mode
{
m_environment.m_networkOperationMode = m_previousNetworkOperationMode;
m_environment.SetOperationMode(m_previousNetworkOperationMode);
}
};

Просмотреть файл

@ -776,8 +776,8 @@ void ComputationNetwork::DescribeNetworkUsingDot(list<ComputationArc>& arcs,
for (const auto& x : allnodes)
{
line.clear();
line = msra::strfun::wstrprintf(L" \"%ls\" [ label = \"%ls [%s%ls]\\n%ls\" ] ;\n",
x->GetName().c_str(), x->GetName().c_str(), string(x->GetSampleLayout()).c_str(), x->GetMBLayoutAxisString().c_str(),
line = msra::strfun::wstrprintf(L" \"%ls\" [ label = \"%ls [%ls%ls]\\n%ls\" ] ;\n",
x->GetName().c_str(), x->GetName().c_str(), wstring(x->GetSampleLayout()).c_str(), x->HasMBLayout() ? L" x *" : L"",
x->OperationName().c_str());
fstream << line;
}
@ -851,7 +851,7 @@ void ComputationNetwork::DescribeNetworkUsingDot(list<ComputationArc>& arcs,
fstream << L"\n}\n";
}
void ComputationNetwork::PlotNetworkTopology(const wstring outputFile) // [1/13/2015 erw] plot network topology using dot language
void ComputationNetwork::PlotNetworkTopology(const wstring& outputFile)
{
VerifyIsCompiled("PlotNetworkTopology");
// ValidateNetwork(false, true);

Просмотреть файл

@ -188,6 +188,7 @@ public:
void AllocateAllMatrices(const std::vector<ComputationNodeBasePtr>& evalRootNodes, const std::vector<ComputationNodeBasePtr>& outValueRootNodes, ComputationNodeBasePtr trainRootNode);
private:
template <class ElemType> void PrintMemorySharingStructure(const std::vector<ComputationNodeBasePtr>& nodes);
void ReleaseMatricesAfterEvalForChildren(ComputationNodeBasePtr n, std::unordered_map<ComputationNodeBasePtr, int>& parentCount);
void AllocateGradientMatricesForInputs(ComputationNodeBasePtr parentNode);
@ -832,7 +833,7 @@ private:
public:
void DescribeNetworkUsingDot(std::list<ComputationArc>& arcs, std::wstring outFile);
void PlotNetworkTopology(const std::wstring outputFile);
void PlotNetworkTopology(const std::wstring& outputFile);
// -----------------------------------------------------------------------
// scripting integration

Просмотреть файл

@ -9,18 +9,19 @@
#include "Basics.h"
#include "ComputationNetworkBuilder.h"
#include "ComputationNode.h"
#include "ConvolutionalNodes.h"
#include "DeprecatedNodes.h"
#include "EvaluationNodes.h"
#include "InputAndParamNodes.h"
#include "LinearAlgebraNodes.h"
#include "NonlinearityNodes.h"
#include "ConvolutionalNodes.h"
#include "RecurrentNodes.h"
#include "ReshapingNodes.h"
#include "PreComputeNodes.h"
#include "TrainingNodes.h"
#include "EvaluationNodes.h"
#include "ReshapingNodes.h"
#include "RecurrentNodes.h"
#include "SpecialPurposeNodes.h"
#include "TrainingNodes.h"
#include <string>
@ -39,6 +40,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
#endif
if (nodeType == OperationNameOf(AbsNode)) return New<AbsNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))return New<ClassBasedCrossEntropyWithSoftmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ClipNode)) return New<ClipNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosDistanceNode)) return New<CosDistanceNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode)) return New<CosDistanceWithNegativeSamplesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(CosineNode)) return New<CosineNode<ElemType>>(forward<_Types>(_Args)...);
@ -53,12 +55,14 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ExpNode)) return New<ExpNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(FloorNode)) return New<FloorNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(FutureValueNode)) return New<FutureValueNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(GatherPackedNode)) return New<GatherPackedNode<ElemType>>(forward<_Types>(_Args)...);
#ifdef COMING_SOON
else if (nodeType == OperationNameOf(GMMLogLikelihoodNode)) return New<GMMLogLikelihoodNode<ElemType>>(forward<_Types>(_Args)...);
#endif
else if (nodeType == OperationNameOf(HardmaxNode)) return New<HardmaxNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(IfNode)) return New<IfNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(InvStdDevNode)) return New<InvStdDevNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(KhatriRaoProductNode)) return New<KhatriRaoProductNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(LogNode)) return New<LogNode<ElemType>>(forward<_Types>(_Args)...);
@ -79,6 +83,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(ReconcileDynamicAxisNode)) return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReciprocalNode)) return New<ReciprocalNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RectifiedLinearNode)) return New<RectifiedLinearNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReduceElementsNode)) return New<ReduceElementsNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReshapeNode)) return New<ReshapeNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RowRepeatNode)) return New<RowRepeatNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RowStackNode)) return New<RowStackNode<ElemType>>(forward<_Types>(_Args)...);
@ -417,6 +422,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Class
return net.AddNodeToNetAndAttachInputs(New<ClassBasedCrossEntropyWithSoftmaxNode<ElemType>>(net.GetDeviceId(), nodeName), { label, prediction, input_weight, cls_log_post_prob });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Clip(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ClipNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b, c });
}
#ifdef COMING_SOON
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CRF(const ComputationNodePtr label,
@ -530,12 +541,24 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Abs(c
return net.AddNodeToNetAndAttachInputs(New<AbsNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Floor(const ComputationNodePtr a, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<FloorNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Hardmax(const ComputationNodePtr a, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<HardmaxNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::If(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<IfNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b, c });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Softmax(const ComputationNodePtr a, const std::wstring nodeName)
{

Просмотреть файл

@ -99,6 +99,7 @@ public:
#endif
ComputationNodePtr Abs(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr ClassCrossEntropyWithSoftmax(const ComputationNodePtr label, const ComputationNodePtr prediction, const ComputationNodePtr input_weight, const ComputationNodePtr cls_log_post_prob, const std::wstring nodeName = L"");
ComputationNodePtr Clip(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"");
ComputationNodePtr Cos(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr CosDistance(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr CrossEntropy(const ComputationNodePtr label, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
@ -111,11 +112,13 @@ public:
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Floor(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");
#ifdef COMING_SOON
ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"");
#endif
ComputationNodePtr Hardmax(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr If(const ComputationNodePtr a, const ComputationNodePtr b, const ComputationNodePtr c, const std::wstring nodeName = L"");
ComputationNodePtr InvStdDev(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr KhatriRaoProduct(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Log(const ComputationNodePtr a, const std::wstring nodeName = L"");

Просмотреть файл

@ -105,7 +105,7 @@ ComputationNodeBasePtr ComputationNetwork::GetNestedNetwork(const ComputationNod
ComputationNetwork::PARTraversalFlowControlNode::PARTraversalFlowControlNode(const std::vector<shared_ptr<SEQTraversalFlowControlNode>>& recurrentInfo, const std::list<ComputationNodeBasePtr>& allNodes /*must be in eval order*/)
{
// traverse the network in evaluation order and create a new list that replaces all recurrence by a SEQTraversalFlowControlNode
set<shared_ptr<IComputationNode>> loopsSeen; // for consistency check only
std::set<shared_ptr<IComputationNode>> loopsSeen; // for consistency check only
for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end();)
{
shared_ptr<SEQTraversalFlowControlNode> recInfo = FindInRecurrentLoops(recurrentInfo, *nodeIter); // check if this node participates in a recurrent loop
@ -803,6 +803,40 @@ void ComputationNetwork::MarkValueNonSharableNodes()
}
}
template <class ElemType>
void ComputationNetwork::PrintMemorySharingStructure(const std::vector<ComputationNodeBasePtr>& nodes)
{
std::map <const Matrix<ElemType>*, std::set<wstring>> memSharingStructure;
for (auto& n : nodes)
{
ComputationNode<ElemType>* node = n->As<ComputationNode<ElemType>>();
std::set<std::pair<const Matrix<ElemType>*, const std::wstring>> matrixInfo = node->GetMatrixInfo();
for (const auto&item : matrixInfo)
{
const Matrix<ElemType>* matrix = item.first;
if (memSharingStructure.find(matrix) == memSharingStructure.end())
memSharingStructure.insert(std::pair<const Matrix<ElemType>*, std::set<wstring>>(matrix, std::set<wstring>()));
std::set<wstring>& s = memSharingStructure[matrix];
s.insert(item.second);
}
}
fprintf(stderr, "\nMemory Sharing Structure:\n\n");
for (const auto& item : memSharingStructure)
{
const std::set<wstring>& s = item.second;
fprintf(stderr, "%p: {", item.first);
for (const auto& memShareInfo: s)
{
fprintf(stderr, "[%ls] ", memShareInfo.c_str());
}
fprintf(stderr, "}\n");
}
fprintf(stderr, "\n");
}
// this function will need to be called before actual validation and execution to
// predetermine how to share matrices to reduce memory usage.
// TODO: find a simple topological order and allocateEvalMatrices on that order directly
@ -947,6 +981,18 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector<ComputationNodeBa
}
m_areMatricesAllocated = true;
//print the memory sharing structure
std::vector<ComputationNodeBasePtr> allNodes = GetAllNodes();
if (allNodes.size() == 0)
LogicError("Network has no computation node.");
if (allNodes[0]->Is<ComputationNode<float>>())
PrintMemorySharingStructure<float>(allNodes);
else if (allNodes[0]->Is<ComputationNode<double>>())
PrintMemorySharingStructure<double>(allNodes);
else
LogicError("Unexpected node precision type.");
}
void ComputationNetwork::ReleaseMatricesAfterEvalForChildren(ComputationNodeBasePtr n, std::unordered_map<ComputationNodeBasePtr, int>& parentCount)

Просмотреть файл

@ -24,95 +24,45 @@
<RootNamespace>CNTK</RootNamespace>
<ProjectName>ComputationNetworkLib</ProjectName>
</PropertyGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<LinkIncremental>true</LinkIncremental>
<PreBuildEventUseInBuild>false</PreBuildEventUseInBuild>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ExecutablePath)</ExecutablePath>
<PreBuildEventUseInBuild>false</PreBuildEventUseInBuild>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<PrecompiledHeader>
</PrecompiledHeader>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLib)</AdditionalLibraryDirectories>
<AdditionalDependencies>Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
<StackReserveSize>100000000</StackReserveSize>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(GpuBuild)">
<ClCompile>
@ -120,9 +70,10 @@
</ClCompile>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
<DelayLoadDLLs>%(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)"</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -144,6 +95,7 @@
<ClInclude Include="ComputationNetworkBuilder.h" />
<ClInclude Include="ComputationNode.h" />
<ClInclude Include="ConvolutionalNodes.h" />
<ClInclude Include="DeprecatedNodes.h" />
<ClInclude Include="PreComputeNodes.h" />
<ClInclude Include="SpecialPurposeNodes.h" />
<ClInclude Include="EvaluationNodes.h" />
@ -159,13 +111,6 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Common\BestGpu.cpp" />
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="ComputationNetwork.cpp" />
<ClCompile Include="ComputationNetworkAnalysis.cpp" />
<ClCompile Include="ComputationNetworkBuilder.cpp" />
@ -181,4 +126,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Просмотреть файл

@ -138,6 +138,9 @@
<ClInclude Include="ComputationEnvironment.h">
<Filter>Environment</Filter>
</ClInclude>
<ClInclude Include="DeprecatedNodes.h">
<Filter>Nodes</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Common">

Просмотреть файл

@ -158,6 +158,84 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
SetDims(TensorShape(dims), HasMBLayout());
}
// N-nary zip operation, e.g. for TernaryZip for clip()
// If allowBroadcast then one can be a sub-dimension of the other (if layout then only for rows, otherwise for cols, too).
// This also helpfully resizes the children if not yet sized.
void ComputationNodeBase::ValidateNaryZip(bool isFinalValidationPass, bool allowBroadcast, size_t numInputs)
{
assert(m_inputs.size() == numInputs);
ComputationNodeBase::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
ValidateInferNaryInputDims(numInputs);
// check minibatch layout consistency for all possible pairs (n choose 2)
if (isFinalValidationPass)
for (size_t i = 0; i < numInputs; i++)
for (size_t j = i+1; j < numInputs; j++)
if (Input(i)->GetMBLayout() != Input(j)->GetMBLayout() && Input(i)->HasMBLayout() && Input(j)->HasMBLayout())
LogicError("%ls: Minibatch layouts are not the same between arguments and might get out of sync during runtime. If this is by design, use ReconcileDynamicAxis() to forward layouts between nodes.", NodeDescription().c_str());
// result has tensor shape with dimensions being the max over all inputs
let shape0 = GetInputSampleLayout(0);
// dims is max over all inputs
size_t maxRank = shape0.GetRank();
for (size_t i = 1; i < numInputs; i++)
{
let shape = GetInputSampleLayout(i);
if (shape.GetRank() > maxRank)
maxRank = shape.GetRank();
}
SmallVector<size_t> dims = shape0.GetDims();
dims.resize(maxRank, 1); // pad with 1
// first check for invalid dimensions
for (size_t k = 0; k < maxRank; k++)
{
size_t maxDim = 0;
TensorShape maxShape = shape0; // arbitrary; this is just used for the error message
for (size_t i = 0; i < numInputs; i++)
{
let currentShape = GetInputSampleLayout(i);
size_t currentRank = currentShape.GetRank();
// make sure that the rank of this input is bigger than the current index (otherwise, these are implied singleton dimensions that do not need to be checked)
if (currentRank > k)
{
size_t currentDim = currentShape[k];
if (currentDim > 1 && maxDim != currentDim && maxDim > 1) // 1=broadcasting, 0=not known yet, meant to be inferred
{
InvalidArgument("%ls: Input dimensions [%s] and [%s] are not compatible.",
NodeDescription().c_str(), string(maxShape).c_str(), string(currentShape).c_str());
}
else if (currentDim > maxDim)
{
maxDim = currentDim;
maxShape = currentShape;
}
}
}
}
// now set up the right dims
for (size_t k = 0; k < maxRank; k++)
{
for (size_t i = 0; i < numInputs; i++)
{
let shape = GetInputSampleLayout(i);
if (shape.GetRank() > k)
{
size_t dim = shape[k];
if (dims[k] <= 1 && dim != 0)
dims[k] = dim;
}
}
}
SetDims(TensorShape(dims), HasMBLayout());
}
// unary reduce-to-(1,1) operation, e.g. MatrixL1RegNode
void ComputationNodeBase::ValidateUnaryReduce(bool isFinalValidationPass)
{
@ -215,6 +293,30 @@ void ComputationNodeBase::ValidateInferBinaryInputDims()
}
}
// as above but for N-ary cases
void ComputationNodeBase::ValidateInferNaryInputDims(size_t numInputs)
{
// limited inference of children dimensions
// if dimension not specified we assume two operands' dimensions should be the same
// NOTE: The assert is set to check if >= numInputs since this is called from nodes which have more than 'nInputs' children.
// The number of children is formally verified elsewhere, so this will not break consistency.
assert(m_inputs.size() >= numInputs);
for (size_t index = 0; index < numInputs; index++)
{
const auto& in = Input(index);
for (size_t indexOther = 0; indexOther < numInputs; indexOther++)
{
if (indexOther != index)
{
const auto& other = Input(indexOther);
// borrow any unset dimension on one input from the other input
in->ValidateInferInputDimsFrom(other->GetSampleLayout());
}
}
}
}
// in case of an error, we just back out, and leave it to outside code to detect errors
template <class ElemType>
void ComputationNode<ElemType>::ValidateInferInputDimsFrom(const TensorShape& otherShape)
@ -252,7 +354,7 @@ TensorShape ComputationNodeBase::GetTensorShape(size_t rank) const
TensorShape tensorShape = GetSampleLayout(); // TODO: Do we need to expect this tensor to have arbitrary strides? In case it came out of a Slice, Reshape, or Transpose op in-place?
if (HasMBLayout())
{
size_t i = rank;
size_t i = (rank != SIZE_MAX) ? rank : tensorShape.GetRank();
tensorShape.AppendInPlace(i++, GetMBLayout()->GetNumParallelSequences());
tensorShape.AppendInPlace(i++, GetMBLayout()->GetNumTimeSteps());
}

Просмотреть файл

@ -649,8 +649,10 @@ protected:
void ValidateUnaryMap(bool isFinalValidationPass);
void ValidateUnaryReduce(bool isFinalValidationPass);
void ValidateInferBinaryInputDims();
void ValidateInferNaryInputDims(size_t numInputs);
void ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast);
void ValidateBinaryReduce(bool isFinalValidationPass);
void ValidateBinaryReduce(bool isFinalValidationPass);
void ValidateNaryZip(bool isFinalValidationPass, bool allowBroadcast, size_t numInputs);
void InferMBLayoutFromInputsForStandardCase(bool isFinalValidationPass);
virtual void ValidateInferInputDimsFrom(const TensorShape&) = 0; // (implemented by ComputationNode<ElemType>)
@ -1318,7 +1320,7 @@ public:
void UpdateFunctionValuesSize()
{
UpdateDataSize(Value());
Value().CollapseDataLocationAfterWriting(); // actually before writing, should change the name
Value().CollapseDataLocation(); // actually before writing, should change the name
}
// -----------------------------------------------------------------------
@ -1420,6 +1422,16 @@ public:
// memory sharing
// -----------------------------------------------------------------------
//this function is for displaying memeory sharing information
//TODO: customize this function for all nodes that uses temp internal matrices.
virtual std::set<std::pair<const Matrix<ElemType>*, const std::wstring>> GetMatrixInfo()
{
std::set<std::pair<const Matrix<ElemType>*, const std::wstring>> matrixInfo;
matrixInfo.insert(make_pair(&Value(), NodeName() + L" Value" + msra::strfun::utf16(ShapeDescription())));
matrixInfo.insert(make_pair(&Gradient(), NodeName() + L" Gradient" + msra::strfun::utf16(ShapeDescription())));
return matrixInfo;
}
// request matrices needed to do node function value evaluation
virtual void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override
{
@ -1961,7 +1973,9 @@ protected:
using Base::Validate; \
using Base::ValidateBinaryReduce; \
using Base::ValidateBinaryZip; \
using Base::ValidateNaryZip; \
using Base::ValidateInferBinaryInputDims; \
using Base::ValidateInferNaryInputDims; \
using Base::ValidateInferInputDimsFrom; \
using Base::ValidateUnaryMap; \
using Base::ValidateUnaryReduce; \

Просмотреть файл

@ -0,0 +1,64 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "Basics.h"
#include "ComputationNode.h"
#include "Matrix.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// SumColumnElements (input)
// Sums up all elements in each sample (column) of the input. Every sample
// will be reduced to a scalar. This is equivalent to multiplying with a row of ones.
// This is deprecated, in favor of ReduceElements().
// -----------------------------------------------------------------------
template <class ElemType>
class SumColumnElementsNode : public ComputationNode<ElemType>, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"SumColumnElements"; }
public:
DeclareConstructorFromConfigWithNumInputs(SumColumnElementsNode);
SumColumnElementsNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
auto sliceInputValue = Input(0)->ValueFor(fr);
auto sliceOutputValue = ValueFor(fr); // row vector
Matrix<ElemType>::VectorSum(sliceInputValue, sliceOutputValue, true);
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
{
auto sliceInputGrad = Input(0)->GradientFor(fr);
auto sliceOutputGrad = GradientFor(fr);
sliceInputGrad += sliceOutputGrad; // here the assumption is that sliceOutputGrad is a row vector
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
SetDims(TensorShape(1), Input(0)->HasMBLayout()); // each column is reduced to a scalar
}
};
template class SumColumnElementsNode<float>;
template class SumColumnElementsNode<double>;
}}}

Просмотреть файл

@ -29,12 +29,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class PlusNode : public BinaryElementWiseNode<ElemType>
{
typedef BinaryElementWiseNode<ElemType> Base;
UsingBinaryElementwiseNodeBaseMembers;
static const std::wstring TypeName()
{
return L"Plus";
}
typedef BinaryElementWiseNode<ElemType> Base; UsingBinaryElementwiseNodeBaseMembers;
static const std::wstring TypeName() { return L"Plus"; }
public:
DeclareConstructorFromConfigWithNumInputs(PlusNode);
@ -76,12 +72,8 @@ template class PlusNode<double>;
template <class ElemType>
class LogPlusNode : public BinaryElementWiseNode<ElemType>
{
typedef BinaryElementWiseNode<ElemType> Base;
UsingBinaryElementwiseNodeBaseMembers;
static const std::wstring TypeName()
{
return L"LogPlus";
}
typedef BinaryElementWiseNode<ElemType> Base; UsingBinaryElementwiseNodeBaseMembers;
static const std::wstring TypeName() { return L"LogPlus"; }
public:
DeclareConstructorFromConfigWithNumInputs(LogPlusNode);
@ -113,6 +105,7 @@ public:
if (Input(inputIndex)->ReducesInTimeWrt(Input(1 - inputIndex)))
Input(1 - inputIndex)->MaskMissingValueColumnsToZero(fr);
// TODO: would be nice to state the derivative here in a comment
inputGradient.AddElementwiseProductWithLogSumDerivativeOf(gradient, input0, input1);
}
};
@ -497,8 +490,7 @@ template class TimesNode<double>;
// This differs from TimesNode in that A is transposed, where A must be a
// rank-1 or rank-2 tensor.
// A common use of transposition is trace(X'X) where X is a matrix of samples.
// This can NOT be implemented with this node. Instead, use
// SumColumnElements (ElementTimes (X, X))
// This can be more efficiently implemented as ReducePlus (ElementTimes (X, X))
// -----------------------------------------------------------------------
template <class ElemType>
@ -653,6 +645,9 @@ template class DiagTimesNode<double>;
// When applied to minibatch data, this will sum across all sequences in the
// minibatch, like a training-criterion node. This is one of the few operations
// that cross the boundary between input sequences.
// Note that SGD itself aggregates over samples in a criterion node.
// So the only proper use of this node is for multi-task learning, where
// different nodes have different numbers of samples (sequence lenth).
// -----------------------------------------------------------------------
template <class ElemType>
@ -697,63 +692,6 @@ public:
template class SumElementsNode<float>;
template class SumElementsNode<double>;
// -----------------------------------------------------------------------
// SumColumnElementsNode (input)
// Sums up all elements in each sample (column) of the input. Every sample
// will be reduced to a scalar. This is equivalent to multiplying with a row of ones.
// TODO: This should be deprecated, in favor of a reduce node.
// TODO: Implement this with the tensor library.
// axis=0: all elements; axis>0: only that axis; axis<0: time (implemented in BS)
// -----------------------------------------------------------------------
template <class ElemType>
class SumColumnElementsNode : public ComputationNode<ElemType>, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"SumColumnElements";
}
public:
DeclareConstructorFromConfigWithNumInputs(SumColumnElementsNode);
SumColumnElementsNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
{
auto sliceInputGrad = Input(0)->GradientFor(fr);
auto sliceOutputGrad = GradientFor(fr);
sliceInputGrad += sliceOutputGrad; // here the assumption is that sliceOutputGrad is a row vector
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
auto sliceInputValue = Input(0)->ValueFor(fr);
auto sliceOutputValue = ValueFor(fr); // row vector
Matrix<ElemType>::VectorSum(sliceInputValue, sliceOutputValue, true);
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
SetDims(TensorShape(1), Input(0)->HasMBLayout()); // each column is reduced to a scalar
}
};
template class SumColumnElementsNode<float>;
template class SumColumnElementsNode<double>;
// -----------------------------------------------------------------------
// TransposeDimensions (input, axis1, axis2)
// - swaps index dimensions axis1 and axis2. The values are 1-based; 1 stands for the leading dimension.

Просмотреть файл

@ -29,9 +29,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
enum GradientOperationType
{
UnaryGradient,
BinaryWithInputGradient,
BinaryWithOutputGradient
noGradient,
unaryGradient,
binaryWithInputGradient,
binaryWithOutputGradient
};
template <class ElemType, ElementWiseOperator opForward, ElementWiseOperator opBackward, GradientOperationType opType>
@ -49,26 +50,27 @@ public:
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
size_t rank = DetermineElementwiseTensorRank();
auto result = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
auto result = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
result.DoUnaryOpOf(0, input, 1, opForward, opSum);
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
assert(inputIndex == 0);
inputIndex;
assert(inputIndex == 0), inputIndex;
// get the args
size_t rank = DetermineElementwiseTensorRank();
auto sliceOutputGrad = GradientTensorFor(rank, fr); // propagate from this one...
auto sliceInputGrad = Input(0)->GradientTensorFor(rank, fr); // ...to this one
// we expect a constant conditional expression here -- suppress the warning that leads to an error
// TODO: alternative: assign to a non-const variable and test that.
#pragma warning( push )
#pragma warning( disable : 4127 )
if (opType == UnaryGradient)
GradientOperationType opTypeHolder = opType; // preventing pragma warning C4127
if (opTypeHolder == noGradient)
{
// Do nothing
}
else if (opTypeHolder == unaryGradient)
{
sliceInputGrad.DoUnaryOpOf(1, sliceOutputGrad, 1, opBackward, opSum);
}
@ -76,11 +78,10 @@ public:
{
// If gradient can be compute from output rather than input, then that's better for mem sharing (and faster in most cases).
// Not possible for Cos().
auto sliceValue = (opType == BinaryWithOutputGradient) ? ValueTensorFor(rank, fr) : // using input or output value
auto sliceValue = (opType == binaryWithOutputGradient) ? ValueTensorFor(rank, fr) : // using input or output value
Input(0)->ValueTensorFor(rank, fr);
sliceInputGrad.DoBinaryOpOf(1, sliceOutputGrad, sliceValue, 1, opBackward, opSum);
}
#pragma warning( pop )
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
@ -90,11 +91,11 @@ public:
virtual bool OutputUsedInComputingInputNodesGradients() const override
{
return opType == BinaryWithOutputGradient;
return opType == binaryWithOutputGradient;
}
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
{
return opType == BinaryWithInputGradient;
return opType == binaryWithInputGradient;
}
};
@ -107,6 +108,7 @@ public:
// RectifiedLinearNode (input)
// LogNode (input)
// ExpNode (input)
// FloorNode (input)
// CosineNode (input)
// SinNode (input)
// Abs(input)
@ -137,18 +139,19 @@ public:
}
// Name Forward and Backward opcodes Gradient optype
DeclareUnaryElementWiseWithOpCodeNode(Pass, Copy, Copy, UnaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sigmoid, Sigmoid, ElementwiseProductWithSigmoidDerivativeFromOutput, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Tanh, Tanh, ElementwiseProductWithTanhDerivativeFromOutput, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(RectifiedLinear, LinearRectifier, ElementwiseProductWithLinearRectifierDerivativeFromOutput, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Log, Log, ElementwiseProductWithLogDerivativeFromOutput, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, BinaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sin, Sin, ElementwiseProductWithSinDerivative, BinaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Abs, Abs, ElementwiseProductWithAbsDerivative, BinaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Negate, Negate, Negate, UnaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sqrt, Sqrt, ElementwiseProductWithSqrtDerivative, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Reciprocal, Reciprocal, ElementwiseProductWithReciprocalDerivative, BinaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Abs, Abs, ElementwiseProductWithAbsDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Cosine, Cosine, ElementwiseProductWithCosDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Exp, Exp, ElementwiseProduct, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Floor, Floor, None, noGradient);
DeclareUnaryElementWiseWithOpCodeNode(Log, Log, ElementwiseProductWithLogDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Negate, Negate, Negate, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Pass, Copy, Copy, unaryGradient);
DeclareUnaryElementWiseWithOpCodeNode(Reciprocal, Reciprocal, ElementwiseProductWithReciprocalDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(RectifiedLinear, LinearRectifier, ElementwiseProductWithLinearRectifierDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sigmoid, Sigmoid, ElementwiseProductWithSigmoidDerivativeFromOutput, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sin, Sin, ElementwiseProductWithSinDerivative, binaryWithInputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Sqrt, Sqrt, ElementwiseProductWithSqrtDerivative, binaryWithOutputGradient);
DeclareUnaryElementWiseWithOpCodeNode(Tanh, Tanh, ElementwiseProductWithTanhDerivativeFromOutput, binaryWithOutputGradient);
#pragma pop_macro("DeclareUnaryElementWiseWithOpCodeNode")
@ -423,4 +426,132 @@ public:
template class HardmaxNode<float>;
template class HardmaxNode<double>;
// -----------------------------------------------------------------------
// If (flag, ifValue, elseValue)
// -----------------------------------------------------------------------
// Similar to C's ternary operator "flag ? ifValue : elseValue". If first input is !=0 return second input, else third
template <class ElemType>
class IfNode : public ComputationNode<ElemType>, public NumInputs<3>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"If";
}
public:
DeclareConstructorFromConfigWithNumInputs(IfNode);
IfNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override { return childIndex == 0; }
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
size_t rank = DetermineElementwiseTensorRank();
auto result = ValueTensorFor(rank, fr);
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
auto input1 = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
auto input2 = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
result.AssignCondOf(input0, input1, input2);
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (inputIndex == 0) // derivative of the first input (the flag) is always 0 => no action.
return;
size_t rank = DetermineElementwiseTensorRank();
auto gradient = GradientTensorFor(rank, fr);
auto input0 = Input(0)-> ValueTensorFor(rank, fr.AllowBroadcast());
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
// if reduction then mask the respective input(s) (zero out the gaps)
if (Input(inputIndex)->ReducesInTimeWrt(shared_from_this()))
MaskMissingGradientColumnsToZero(fr);
if (inputIndex == 1)
{
inputGradient.AddCopyIfOf(input0, gradient);
}
else if (inputIndex == 2)
{
inputGradient.AddCopyIfNotOf(input0, gradient);
}
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
ValidateNaryZip(isFinalValidationPass, /* allow broadcast */ true, /* num Inputs */ 3);
}
};
template class IfNode<float>;
template class IfNode<double>;
// -----------------------------------------------------------------------
// ClipNode (minValue, maxValue, tensor)
// -----------------------------------------------------------------------
// This node clips the values in a tensor elements-wise to ensure they are within minValue <= x >= maxValue
// The gradient (per element) is (ge(x, minValue) AND le(x, maxValue)), or in other words, 1 if the value has
// not been clipped, and 0 if the value has been clipped.
template <class ElemType>
class ClipNode : public ComputationNode<ElemType>, public NumInputs<3>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"Clip";
}
public:
DeclareConstructorFromConfigWithNumInputs(ClipNode);
ClipNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
size_t rank = DetermineElementwiseTensorRank();
auto result = ValueTensorFor(rank, fr);
auto input0 = Input(0)->ValueTensorFor(rank, fr.AllowBroadcast());
auto input1 = Input(1)->ValueTensorFor(rank, fr.AllowBroadcast());
auto input2 = Input(2)->ValueTensorFor(rank, fr.AllowBroadcast());
result.AssignClipOf(input0, input1, input2);
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
// there is only a gradient for the input tensor that is to be clipped
if (inputIndex == 2)
{
size_t rank = DetermineElementwiseTensorRank();
auto gradient = GradientTensorFor(rank, fr);
auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
auto input = Input(inputIndex)->ValueTensorFor(rank, fr.AllowBroadcast());
auto output = ValueTensorFor(rank, fr.AllowBroadcast());
inputGradient.AddCopyIfEqualOf(input, output, gradient);
}
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
ValidateNaryZip(isFinalValidationPass, /* allow broadcast */ true, /* num Inputs */ 3);
}
};
template class ClipNode<float>;
template class ClipNode<double>;
}}}

Просмотреть файл

@ -23,6 +23,133 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// ReduceElements (op, axis=, input)
// -----------------------------------------------------------------------
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const /*override*/
{
Base::CopyTo(nodeP, newName, flags);
auto node = dynamic_pointer_cast<ReduceElementsNode<ElemType>>(nodeP);
node->m_axis = m_axis;
node->m_operation = m_operation;
node->m_op = m_op;
}
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::Load(File& fstream, size_t modelVersion) /*override*/
{
Base::Load(fstream, modelVersion);
fstream >> m_axis >> m_operation;
ValidateOp();
}
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::Save(File& fstream) const /*override*/
{
Base::Save(fstream);
fstream << m_axis << m_operation; // note: we serialize the string and not the opcode, since opcodes may change
}
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::ForwardProp(const FrameRange& fr) /*override*/
{
// get the args
size_t rank = DetermineElementwiseTensorRank();
auto result = ValueTensorFor(rank, fr);
auto input = Input(0)->ValueTensorFor(rank, fr);
// the actual operation is a Copy with a reduction op
result.DoUnaryOpOf(0, input, 1, ElementWiseOperator::opCopy, m_op);
// note: we can implement "Mean" by passing 1/dim for alpha
}
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::BackpropTo(const size_t inputIndex, const FrameRange& fr) /*override*/
{
assert(inputIndex == 0), inputIndex;
// get the args
size_t rank = DetermineElementwiseTensorRank();
auto sliceOutputGrad = GradientTensorFor(rank, fr); // propagate from this one...
auto sliceInputGrad = Input(0)->GradientTensorFor(rank, fr); // ...to this one
// gradients are not as simple as passing an op-code, unfortunately
switch (m_op)
{
case ElementWiseOperator::opSum:
// "Plus": broadcast the gradient
sliceInputGrad.AddCopyOf(sliceOutputGrad);
break;
// more coming
// "LogPlus": softmax
// f(x) = log(sum_i exp x_i), hence gradient is:
// df / dx_i = 1 / (sum_j exp x_j) * exp x_i = (Softmax(x))_i = exp(x_i – ReduceLogPlus(x))
// targetGradient = gradientFromTop .* Exp (inputValue - outputValue) --TODO: verify
// i.e. compute dfference if input and output, then Exp in-place. No, would need temp memory. So needs its own opcode AddScaledExpOfDiff(). Ternary.
// "Max": Copy the gradient only to the max value. targetGradient += gradientFromTop .* (outputValue == inputValue). Needs its own opcode. --TODO : verify
}
}
template <class ElemType>
/*virtual*/ bool ReduceElementsNode<ElemType>::OutputUsedInComputingInputNodesGradients() const /*override*/
{
switch (m_op)
{
case ElementWiseOperator::opSum: return false;
// will be different e.g. for LogPlus, Max, and Min
}
LogicError("Should not get here.");
}
template <class ElemType>
/*virtual*/ bool ReduceElementsNode<ElemType>::InputUsedInComputingInputNodesGradients(size_t inputIndex) const /*override*/
{
switch (m_op)
{
case ElementWiseOperator::opSum: return false;
// will be different for LogPlus, Max, and Min
}
LogicError("Should not get here.");
}
// map the operation specific as a string to an ElementWiseOperator to pass to
template <class ElemType>
void ReduceElementsNode<ElemType>::ValidateOp()
{
if (m_operation == L"Plus") m_op = ElementWiseOperator::opSum;
// more here
else InvalidArgument("%ls was given an invalid operation code '%ls'. Allowed are: 'Plus'. And a few more soon.", NodeDescription().c_str(), m_operation.c_str());
}
template <class ElemType>
/*virtual*/ void ReduceElementsNode<ElemType>::Validate(bool isFinalValidationPass) /*override*/
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
// validate the opcode (in case we got instantiated empty and never updated)
ValidateOp();
let shape = Input(0)->GetSampleLayout();
auto dims = shape.GetDims();
if (m_axis == 0)
dims = { 1 }; // entire sample is reduced to a scalar
else if (m_axis - 1 >= 0 && m_axis - 1 < dims.size())
dims[m_axis - 1] = 1; // one axis is reduced to a scalar
else if (isFinalValidationPass)
InvalidArgument("The shape of %ls [%s] has no axis %d", NodeDescription().c_str(), string(shape).c_str(), m_axis);
SetDims(TensorShape(dims), Input(0)->HasMBLayout());
}
template class ReduceElementsNode<float>;
template class ReduceElementsNode<double>;
// -----------------------------------------------------------------------
// Where(bitVector) -- extract indices of non-0 values in a sequence
// -----------------------------------------------------------------------
@ -74,7 +201,7 @@ template <class ElemType>
indexSequence.push_back(t);
// Note: The above accesses m_value directly on the CPU, putting it into BOTH state, possibly for other consumers as well.
}
input.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
input.CollapseDataLocation(); // BUGBUG: Move back, since BOTH state is broken at present.
// create a new MBLayout
let& outMBLayout = GetMBLayout();
outMBLayout->InitAsPackedSequences(SequenceLengthVector(sequences, indexSequences), /*temp*/m_placementBuffer, /*temp*/m_rowAllocationsBuffer);
@ -158,7 +285,7 @@ template <class ElemType>
}
}
// Note: maybe this is no longer needed, now that we do the same inside UpdateFunctionValueSize() for all nodes.
result.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
result.CollapseDataLocation(); // BUGBUG: Move back, since BOTH state is broken at present.
}
template <class ElemType>
@ -223,12 +350,12 @@ template <class ElemType>
// inherit MBLayout from indexData
m_pMBLayout = Input(INDEXDATA)->GetMBLayout();
if (isFinalValidationPass && (!Input(INDEXDATA)->HasMBLayout()))
LogicError("%ls requires first argument (index data) to have a time dimension.", this->NodeDescription().c_str());
LogicError("%ls requires first argument (index data) to have a time dimension.", NodeDescription().c_str());
bool sourceHasTimeDimension = Input(SOURCEDATA)->HasMBLayout();
if (isFinalValidationPass && Input(INDEXDATA)->GetSampleLayout().GetNumElements() != 1)
InvalidArgument("%ls requires the first argument (index data) to be a scalar time sequence.", this->NodeDescription().c_str());
InvalidArgument("%ls requires the first argument (index data) to be a scalar time sequence.", NodeDescription().c_str());
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...
if (sourceHasTimeDimension)

Просмотреть файл

@ -170,6 +170,56 @@ private:
template class ReshapeNode<float>;
template class ReshapeNode<double>;
// -----------------------------------------------------------------------
// ReduceElements (op, axis=, input)
// Reduces (e.g. sums up) all elements in each sample (column) of the input.
// The optional axis can be 0 (meaning all elements) or a specific axis.
// Allowed operations:
// - "Plus"
// - "LogPlus" --not implemented yet
// - "Mean" --not implemented yet
// - "Max" --not implemented yet
// - "Min" --not implemented yet
// - "All" --not implemented yet
// - "Any" --not implemented yet
// -----------------------------------------------------------------------
template <class ElemType>
class ReduceElementsNode : public ComputationNode<ElemType>, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"ReduceElements"; }
void ValidateOp();
public:
ReduceElementsNode(DEVICEID_TYPE deviceId, const wstring& name, const std::wstring& operation = std::wstring(), int axis = 0) :
Base(deviceId, name), m_operation(operation), m_axis(axis), m_op((ElementWiseOperator)-1/*invalid*/)
{
if (!m_operation.empty()) // verify validity already here out of courtesy (would otherwise be caught in Validate())
ValidateOp();
}
ReduceElementsNode(const ScriptableObjects::IConfigRecordPtr configp) :
ReduceElementsNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"reductionOp"), configp->Get(L"axis"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
virtual void /*ComputationNodeBase::*/ CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override;
virtual void /*ComputationNodeBase::*/ Load(File& fstream, size_t modelVersion) override;
virtual void /*ComputationNodeBase::*/ Save(File& fstream) const override;
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override;
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override;
virtual bool /*ComputationNodeBase::*/ OutputUsedInComputingInputNodesGradients() const override;
virtual bool /*ComputationNodeBase::*/ InputUsedInComputingInputNodesGradients(size_t childIndex) const override;
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
private:
int m_axis;
std::wstring m_operation; // the operation as a string, e.g. "Plus", see GetOpcode()
ElementWiseOperator m_op; // the operation mapped to our internal opCode
};
// -----------------------------------------------------------------------
// ReconcileDynamicAxis (dataInput, layoutInput)
// This node copies data from 'dataInput' while it propagates the minibatch-layout information from 'layoutInput'.
@ -1320,10 +1370,10 @@ reshaping
reductions
----------
- ReduceSum
- these are/will be implemented as a node for samples, and as recurrences for sequences
- ReducePlus
- sum over all elements of a dimension, or over time
- we already got: SumColumnElements
- ReduceMax
- ReduceMax, ReduceMin
- max
- can use MaxPooling?
- ReduceMean
@ -1332,12 +1382,12 @@ reductions
- ArgMax, ArgMin
- we already have that somewhere, for evaluation
- All, Any
- logical test --must be done over sequences
- logical test
- TF also has:
- reduce_prod, reduce_min
- reduce_prod
- segment_sum etc.; we use sequences
- listdiff
- where: indices of 'true' values -> 2D tensor of coordinates
- where: indices of 'true' values -> 2D tensor of coordinates (unlike our Where)
- unique (1D only)
- edit_distance
- invert_permutation: invert a permutation index vector

Просмотреть файл

@ -1451,7 +1451,11 @@ public:
Matrix<ElemType> sliceInput0Value = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
if (m_dropoutRate > 0)
if (Environment().IsInferring() || m_dropoutRate <= 0)
{
sliceOutputValue.SetValue(sliceInput0Value);
}
else
{
// determine drop-out mask for this minibatch
auto sliceMask = DataFor(*m_maskOfDropout, fr);
@ -1460,10 +1464,6 @@ public:
// apply dropout mask
sliceOutputValue.AssignElementProductOf(sliceMask, sliceInput0Value);
}
else
{
sliceOutputValue.SetValue(sliceInput0Value);
}
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override

Просмотреть файл

@ -100,6 +100,11 @@ void CNTKEval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimen
{
case nodeInput:
{
if (outputNodes.size() == 0)
{
LogicError("No Output nodes found: Cannot determine Input node dimensions due to lack of Output nodes.\n(are 'outputNodeNames' and/or 'OutputNodes' properly defined in the configuration file?)");
}
auto& nodes = m_net->InputNodes(outputNodes[0]);
for (auto& node : nodes)
{

Просмотреть файл

@ -3,6 +3,10 @@
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CNTKEval.h - Include file for the CNTK Evaluation DLL
//
// NOTICE: This interface is a public interface for evaluating models in CNTK.
// Changes to this interface may affect other projects, such as Argon and LatGen,
// and therefore need to be communicated with such groups.
//
#pragma once

Просмотреть файл

@ -75,7 +75,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
@ -99,7 +99,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ActionsLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
@ -120,7 +120,7 @@
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)"</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -140,22 +140,6 @@
<ItemGroup>
<ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp" />
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp" />
<ClCompile Include="..\Common\Config.cpp" />
<ClCompile Include="..\Common\DataReader.cpp" />
<ClCompile Include="..\Common\Eval.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="dllmain.cpp">
<CompileAsManaged>false</CompileAsManaged>
<PrecompiledHeader>
@ -169,4 +153,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -4135,6 +4135,111 @@ void CPUMatrix<ElemType>::ConvolutionBackwardKernel(const CPUMatrix<ElemType>& i
}
}
template <class ElemType>
void CPUMatrix<ElemType>::UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const
{
size_t batchSize = GetNumCols();
#pragma omp parallel for
for (int64_t sample = 0; sample < (int64_t)batchSize; sample++)
{
for (size_t row = 0; row < mapOutSize; row++)
{
int colBase = mpRowCol(row, 0);
assert(0 <= colBase && colBase < GetNumRows());
int i0 = mpRowRun(row, 0);
int skip = runs(i0++, 0);
int size = runs(i0++, 0);
int imask = i0 + size;
for (int i = 0; i < size; i++)
{
if (runs(imask + i, 0) == 0)
continue;
int dcol = runs(i0 + i, 0);
assert(0 <= colBase + dcol && colBase + dcol < GetNumRows());
output.Data()[(row * batchSize + sample) * unrollCols + skip + i] = (*this)(colBase + dcol, sample);
}
}
}
}
template <class ElemType>
void CPUMatrix<ElemType>::UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const
{
assert((mpRowCol.GetNumRows() % mapOutCount) == 0);
size_t mapOutSize = mpRowCol.GetNumRows() / mapOutCount;
size_t batchSize = GetNumCols();
size_t kernelSize = runs(1, 0);
assert((kernelSize % mapInCount) == 0);
size_t kernelMapSize = kernelSize / mapInCount;
#pragma omp parallel for
for (int64_t sample = 0; sample < (int64_t)GetNumCols(); sample++)
{
for (size_t row = 0; row < mapOutSize; row++)
{
int colBase = mpRowCol(row, 0);
int i0 = mpRowRun(row, 0);
int skip = runs(i0++, 0);
int size = runs(i0++, 0);
int imask = i0 + size;
for (int i = 0; i < std::min(size, (int)kernelMapSize); i++)
{
if (runs(imask + i, 0) == 0)
continue;
int dcol = runs(i0 + i, 0);
size_t isrc = row;
size_t idst = ((colBase + dcol) * batchSize + sample) * unrollCols + ((skip + i) % kernelMapSize) * mapOutCount;
for (size_t outMap = 0; outMap < mapOutCount; outMap++, isrc += mapOutSize)
{
assert(isrc < GetNumElements());
assert(idst + outMap < output.GetNumElements());
output.Data()[idst + outMap] = (*this)(isrc, sample);
}
}
}
}
}
template <class ElemType>
void CPUMatrix<ElemType>::UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const
{
size_t batchSize = GetNumCols();
size_t unrollCols = mapOutSize * batchSize;
#pragma omp parallel for
for (int64_t sample = 0; sample < (int64_t)batchSize; sample++)
{
for (size_t row = 0; row < mapOutSize; row++)
{
int colBase = mpRowCol(row, 0);
assert(0 <= colBase && colBase < GetNumRows());
int i0 = mpRowRun(row, 0);
int skip = runs(i0++, 0);
int size = runs(i0++, 0);
int imask = i0 + size;
for (int i = 0; i < size; i++)
{
if (runs(imask + i, 0) == 0)
continue;
int dcol = runs(i0 + i, 0);
assert(0 <= colBase + dcol && colBase + dcol < GetNumRows());
size_t idst = (skip + i) * unrollCols + row * batchSize + sample;
assert(idst < output.GetNumElements());
output.Data()[idst] = (*this)(colBase + dcol, sample);
}
}
}
}
template <class ElemType>
void CPUMatrix<ElemType>::MaxPoolingForward(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices, CPUMatrix<ElemType>& output) const
{

Просмотреть файл

@ -348,6 +348,13 @@ public:
void ConvolutionBackwardKernel(const CPUMatrix<ElemType>& in, const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIwht,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& kernelGrad) const;
void UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const;
void UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const;
void UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const CPUMatrix<int>& mpRowCol,
const CPUMatrix<int>& mpRowRun, const CPUMatrix<int>& runs, CPUMatrix<ElemType>& output) const;
void MaxPoolingForward(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices, CPUMatrix<ElemType>& output) const;
void MaxPoolingBackward(const CPUMatrix<ElemType>& out, const CPUMatrix<ElemType>& in,
const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices,

Просмотреть файл

@ -266,6 +266,38 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
}
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val)
{
VerifyWritable(__func__);
size_t n = GetNumCols();
if (n != columnsMask.GetNumCols())
RuntimeError("Matrix and column mask must have equal number of columns.");
if (val != 0)
LogicError("MaskColumnsValue is not implmented for a non-zero mask for sparse matrices.");
#ifdef _DEBUG
if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
{
// Get the binary columns mask
char* maskedCols = columnsMask.Data();
// If we're CSC, we only need to verify that the columns to be zeroed are empty.
GPUSPARSE_INDEX_TYPE* colVector = SecondaryIndexLocation();
#pragma omp parallel for
for (long j = 0; j < n; j++)
if (maskedCols[j] == 0 && colVector[j + 1] != colVector[j])
LogicError("CPUSparseMatrix attempted to mask column %d, but it has %d elements in it.", (int)j, (int)(colVector[j + 1] - colVector[j]));
}
else
NOT_IMPLEMENTED;
#endif
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const
{

Просмотреть файл

@ -83,6 +83,7 @@ public:
void SetValue(const size_t row, const size_t col, ElemType val);
void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
size_t BufferSize() const
{

Просмотреть файл

@ -73,29 +73,30 @@ private:
enum ElementWiseOperator
{
// nullary
opConstOne,
opConstOne, opNone,
// unary (or binary with constant parameter)
opCopy,
opNegate, opNot, opAbs, opReciprocal,
opNegate, opNot, opAbs, opFloor, opReciprocal,
opSigmoid, opTanh, opSqr, opSqrt, opExp, opLog, opLinearRectifier, opCosine, opSin,
// unary ops for use by Matrix class only (there is no TensorView implementation)
opSigmoidDerivative, opLinearRectifierDerivative, opNegativeSine,
// binary
opSum, opDifference, opElementwiseProduct, opElementwiseQuotient, opLogSum,
opCopyIf, opCopyIfNot, opSum, opDifference, opElementwiseProduct, opElementwiseQuotient, opLogSum,
opMax, opMin,
opLT, opEQ, opGT, opGE, opNE, opLE, // Note: must obey this order: (sgn(a-b) == -1, 0, +1), (sgn(a-b) != -1, 0, +1)
opAnd, opOr, opXor, opMaskNegative,
opElementwiseProductWithSigmoidDerivativeFromOutput, opElementwiseProductWithTanhDerivativeFromOutput,
opElementwiseProductWithLinearRectifierDerivativeFromOutput, opElementwiseProductWithLogDerivativeFromOutput,
opElementwiseProductWithCosDerivative, opElementwiseProductWithSinDerivative,
opElementwiseProductWithAbsDerivative, opElementwiseProductWithSqrtDerivative,
opElementwiseProductWithAbsDerivative, opElementwiseProductWithSqrtDerivative,
opElementwiseProductWithReciprocalDerivative, opSqrOfDifference,
// binary ops for indexing
// opIndex,
// ternary
opCond /*a ? b : c*/,
opClip, /*clip a within interval b..c*/
opElementwiseProductWithLogSumDerivative
opElementwiseProductWithLogSumDerivative,
opCopyIfEqual
// Note: not all that's implemented in CNTK ComputationNodes has an opcode yet.
};
@ -108,6 +109,7 @@ enum ElementWiseOperator
Macro(Negate); \
Macro(Not); \
Macro(Abs); \
Macro(Floor); \
Macro(Reciprocal); \
Macro(Sigmoid); \
Macro(Tanh); \
@ -120,6 +122,8 @@ enum ElementWiseOperator
Macro(Sin);
#define ForAllBinaryOps(Macro) \
Macro(CopyIf); \
Macro(CopyIfNot); \
Macro(Sum); \
Macro(Difference); \
Macro(ElementwiseProduct); \
@ -149,10 +153,11 @@ enum ElementWiseOperator
Macro(SqrOfDifference); \
//Macro(Index);
#define ForAllTernaryOps(Macro) \
Macro(Cond); \
Macro(Clip); \
Macro(ElementwiseProductWithLogSumDerivative);
#define ForAllTernaryOps(Macro) \
Macro(Cond); \
Macro(CopyIfEqual); \
Macro(Clip); \
Macro(ElementwiseProductWithLogSumDerivative);
// -----------------------------------------------------------------------
// various enums to describe

Просмотреть файл

@ -210,13 +210,13 @@ protected:
InvalidArgument("Pooling type %d is not supported.", (int)m_poolKind);
}
private:
protected:
static bool IsGpu(DEVICEID_TYPE deviceId)
{
return deviceId >= 0;
}
private:
protected:
using IntMatPtr = std::unique_ptr<Matrix<int>>;
Matrix<int> m_mpRowCol;
@ -511,6 +511,308 @@ private:
bool m_gpuSparse1D;
};
//------------------------------------------------------------------
// GEMM convolution engine implementation.
// This engine supports arbitrary convolution configuration with full
// sharing and implemented using unroll + GEMM technique
// (High performance convolutional neural networks for document processing; Chellapilla, Puri, Simard)
// Uses reference engine for pooling operations.
//------------------------------------------------------------------
template <class ElemType>
class GemmConvolutionEngine : public ReferenceConvolutionEngine<ElemType>
{
public:
using Base = ReferenceConvolutionEngine<ElemType>;
using typename Base::Mat;
public:
GemmConvolutionEngine(ConvolveGeometryPtr geometry, DEVICEID_TYPE deviceId, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, PoolKind poolKind)
: Base(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind)
{
}
protected:
using typename Base::IntMatPtr;
using Base::IsGpu;
using Base::m_geometry;
using Base::m_deviceId;
using Base::m_imageLayout;
using Base::m_maxTempMemSizeInSamples;
using Base::m_mpRowCol;
using Base::m_mpRowIwht;
using Base::m_mpRowRun;
using Base::m_runs;
void EnsureCompatible() override
{
if (m_imageLayout != ImageLayoutKind::CHW)
LogicError("GEMM convolution engine supports only CHW/cudnn layout.");
if (IsGpu(m_deviceId))
LogicError("GEMM convolution engine currently supports only CPU device.");
}
// A note on notation used in the documentation for the next 3 functions:
// for simplicity we use cuDNN-style notation for 2D convolutions (though this engine supports arbitrary convolution configuration)
// where N - is the number of samples in a batch, C, H, W are number of channels, height and width of the input respectively.
// For the output we use K as the number of output feature maps and H', W' as height and width of the output.
// We also use column-major notation everywhere (as opposed to cuDNN which uses row-major) to follow CNTK rules.
// For kernels we use X, Y, Z to represent width, height and depth. This engine requires Z == C which is
// not a significant restriction as tensors of higher dimensions (+1) can be used to describe the same convolution configuration.
// Example: [WHC x N] - is a matrix of WHC rows by N columns and represents a convolution input
// where each column is a sample that has layout of WHC, so W dimension stride is 1.
//
// The forward method consists of 3 parts:
// 1. Unrolling convolution input (in) into a matrix: [WHC x N] -> [XYC x NW'H']
// Using this format allows to perform convolution for the whole minibatch as a single GEMM operation
// which is not possible with WHCN format. Alternatively, CWHN format (used in legacy engine) could be used
// but this would require both unrolling the input and transforming the weight matrix.
// 2. Performing matrix multiplication of unrolled input with weight matrix:
// [XYC x NW'H']^T * [XYC x K] -> [NW'H' x K]
// 3. Reshape and transpose result: [NW'H' x K] -> [N x W'H'K]^T -> [W'H'K x N]
// In case minibatch size == 1 this step is not required and step 2 writes results directly to output (out).
void ForwardCore(const Mat& in, const Mat& kernel, Mat& out, Mat& workspace) override
{
size_t batchSize = in.GetNumCols();
size_t subBatchSize = m_maxTempMemSizeInSamples == 0 ? batchSize : min(batchSize, m_maxTempMemSizeInSamples);
size_t mapCount = m_geometry->GetMapCount(m_geometry->InputShape().GetRank() - 1);
size_t mapOutSize = m_geometry->OutputShape().GetNumElements() / mapCount;
size_t unrollRows = mapOutSize * subBatchSize;
size_t unrollCols = m_geometry->KernelShape().GetNumElements();
// Reserve space for unrolled inputs and, if needed, intermediate outputs.
// Intermediate outputs will be transposed to final outputs after GEMM operation.
// Transpose is not required if subBatchSize == 1.
workspace.Resize(unrollRows, unrollCols + (subBatchSize > 1 ? mapCount : 0));
for (size_t start = 0; start < batchSize; start += subBatchSize)
{
size_t curBatchSize = min(subBatchSize, batchSize - start);
auto inputSlice = in.ColumnSlice(start, curBatchSize);
auto unrolledInput = workspace.ColumnSlice(0, unrollCols);
if (curBatchSize != subBatchSize)
{
unrolledInput.Reshape(mapOutSize, subBatchSize * unrollCols);
unrolledInput = unrolledInput.ColumnSlice(0, curBatchSize * unrollCols);
}
// Need to reshape (soft transpose) as matrices are column-major.
unrolledInput.Reshape(unrollCols, mapOutSize * curBatchSize);
// Unroll inputs.
unrolledInput.SetValue(0);
inputSlice.UnrollConvolutionInput(unrollCols, mapOutSize, m_mpRowCol, *m_mpRowRun, *m_runs, unrolledInput);
// cudnn layout uses row-major kernel weight matrix.
auto kern = kernel.ColumnSlice(0, kernel.GetNumCols());
kern.Reshape(kernel.GetNumCols(), kernel.GetNumRows());
// Perform matrix multiplication of unrolled inputs with weights.
// If there is just one sample in the sub-batch then compute result directly to the output matrix.
if (curBatchSize == 1)
{
auto outSlice = out.ColumnSlice(start, 1);
outSlice.Reshape(mapOutSize, mapCount);
Mat::Multiply(unrolledInput, true, kern, false, outSlice);
}
else
{
auto outTempSlice = workspace.ColumnSlice(unrollCols, mapCount);
if (curBatchSize != subBatchSize)
{
outTempSlice.Reshape(mapOutSize, subBatchSize * mapCount);
outTempSlice = outTempSlice.ColumnSlice(0, curBatchSize * mapCount);
outTempSlice.Reshape(mapOutSize * curBatchSize, mapCount);
}
Mat::Multiply(unrolledInput, true, kern, false, outTempSlice);
outTempSlice.Reshape(curBatchSize, mapOutSize * mapCount);
auto outSlice = out.ColumnSlice(start, curBatchSize);
outSlice.AssignTransposeOf(outTempSlice);
}
}
}
// The backward data method works by representing this operation as a "reverse" convolution
// in case kernel's last dimension is equal to input dimension. Gradients matrix (grad) becomes
// an output of such reverse convolution.
// There are 4 steps:
// 1. Transpose and reshape kernel weights: [XYC x K]^T -> [K x XYC] -> [KXY x C]
// 2. Unroll convolution output (here source gradients, srcGrad):
// [W'H'K' x N] -> [KXY x NWH]
// 3. Performing matrix multiplication of unrolled scrGrad with transposed weights:
// [KXY x NWH]^T * [KXY x C] -> [NWH x C]
// 4. Reshape and transpose outputs (grad): [NWH x C] -> [N x WHC]^T -> [WHC x N]
// In case minibatch size == 1 this step is not required and step 3 writes results directly to output (grad).
void BackwardDataCore(const Mat& srcGrad, const Mat& kernel, Mat& grad, Mat& workspace) override
{
size_t batchSize = srcGrad.GetNumCols();
size_t subBatchSize = m_maxTempMemSizeInSamples == 0 ? batchSize : min(batchSize, m_maxTempMemSizeInSamples);
const auto& inT = m_geometry->InputShape();
const auto& kernT = m_geometry->KernelShape();
size_t dimCount = inT.GetRank();
assert(kernT[dimCount - 1] == inT[dimCount - 1]);
if (kernT[dimCount - 1] != inT[dimCount - 1])
{
RuntimeError("GEMM convolution engine does not support this convolution configuration. "
"It is possible to make GEMM engine work with this configuration by defining "
"input/output/kernel using tensors of higher(+1) dimension. Geometry: %s", ((string)*m_geometry).c_str());
}
size_t mapInCount = kernT[dimCount - 1];
size_t mapOutCount = m_geometry->GetMapCount(dimCount - 1);
size_t mapInSize = inT.GetNumElements() / mapInCount;
size_t unrollRows = mapInSize * subBatchSize;
size_t unrollCols = kernel.GetNumElements() / mapInCount;
// Reserve space for:
// 1. Transposed kernel weights.
// 2. Unrolled source gradients.
// 3. Intermediate gradients (optional).
// Intermediate outputs will be transposed to final outputs after GEMM operation.
// Transpose is not required if subBatchSize == 1.
size_t kernCols = kernel.GetNumElements();
workspace.Resize(1, kernCols + unrollRows * (unrollCols + (subBatchSize > 1 ? mapInCount : 0)));
auto kern = kernel.ColumnSlice(0, kernel.GetNumCols());
// cudnn layout uses row-major kernel weight matrix.
kern.Reshape(kernel.GetNumCols(), kernel.GetNumRows());
// Now transpose and reshape to [KXY x C].
auto kernTran = workspace.ColumnSlice(0, kernCols);
// Reshape to transpose shape, AssignTransposeOf requires that.
kernTran.Reshape(kern.GetNumCols(), kern.GetNumRows());
kernTran.AssignTransposeOf(kern);
kern = kernTran.ColumnSlice(0, kernTran.GetNumCols());
// Reshape to final shape.
kern.Reshape(kernel.GetNumElements() / mapInCount, mapInCount);
for (size_t start = 0; start < batchSize; start += subBatchSize)
{
size_t curBatchSize = min(subBatchSize, batchSize - start);
auto srcGradSlice = srcGrad.ColumnSlice(start, curBatchSize);
auto unrolledSrcGrad = workspace.ColumnSlice(kernCols, unrollRows * unrollCols);
if (curBatchSize != subBatchSize)
unrolledSrcGrad = unrolledSrcGrad.ColumnSlice(0, mapInSize * curBatchSize * unrollCols);
// Need to reshape (soft transpose) as matrices are column-major.
unrolledSrcGrad.Reshape(unrollCols, mapInSize * curBatchSize);
// Unroll outputs (source gradients).
unrolledSrcGrad.SetValue(0);
srcGradSlice.UnrollConvolutionOutput(unrollCols, mapInCount, mapOutCount, m_mpRowCol, *m_mpRowRun, *m_runs, unrolledSrcGrad);
// Perform matrix multiplication of unrolled outputs with weights.
// If there is just one sample in the sub-batch then compute result directly to the output matrix.
if (curBatchSize == 1)
{
auto gradSlice = grad.ColumnSlice(start, 1);
gradSlice.Reshape(mapInSize, mapInCount);
Mat::MultiplyAndAdd(unrolledSrcGrad, true, kern, false, gradSlice);
}
else
{
// Need to transpose existing destination gradients first so we can add new values to them.
auto gradTempSlice = workspace.ColumnSlice(kernCols + unrollRows * unrollCols, unrollRows * mapInCount);
if (curBatchSize != subBatchSize)
gradTempSlice = gradTempSlice.ColumnSlice(0, mapInSize * curBatchSize * mapInCount);
gradTempSlice.Reshape(curBatchSize, mapInSize * mapInCount);
auto gradSlice = grad.ColumnSlice(start, curBatchSize);
gradTempSlice.AssignTransposeOf(gradSlice);
gradTempSlice.Reshape(mapInSize * curBatchSize, mapInCount);
// Multiply unrolled srcGrad with weights and add to grad.
Mat::MultiplyAndAdd(unrolledSrcGrad, true, kern, false, gradTempSlice);
// Reshape and transpose grads back to original form.
gradTempSlice.Reshape(curBatchSize, mapInSize * mapInCount);
gradSlice.AssignTransposeOf(gradTempSlice);
}
}
}
// The backward kernel method consists of 3 parts:
// 1. Transpose and reshape convolution output matrix (srcGrad) into [NW'H' x K] layout.
// This step is not needed if current minibatch size == 1 and srcGrad are used instead.
// 2. Unrolling convolution input (in) into a matrix of [NW'H' x WHC] layout.
// 3. Performing matrix multiplication of unrolled input with transposed output:
// [NW'H' x WHC]^T * [NW'H' x K] -> [WHC x K] - kernel gradients.
void BackwardKernelCore(const Mat& srcGrad, const Mat& in, Mat& kernelGrad, bool /*allowReuse*/, Mat& workspace) override
{
size_t batchSize = srcGrad.GetNumCols();
size_t subBatchSize = m_maxTempMemSizeInSamples == 0 ? batchSize : min(batchSize, m_maxTempMemSizeInSamples);
const auto& inT = m_geometry->InputShape();
const auto& kernT = m_geometry->KernelShape();
const auto& outT = m_geometry->OutputShape();
size_t dimCount = inT.GetRank();
size_t mapOutCount = m_geometry->GetMapCount(dimCount - 1);
size_t mapOutSize = outT.GetNumElements() / mapOutCount;
assert(kernT[dimCount - 1] == inT[dimCount - 1]);
if (kernT[dimCount - 1] != inT[dimCount - 1])
{
RuntimeError("GEMM convolution engine does not support this convolution configuration. "
"It is possible to make GEMM engine work with this configuration by defining "
"input/output/kernel using tensors of higher(+1) dimension. Geometry: %s", ((string)*m_geometry).c_str());
}
size_t unrollRows = kernT.GetNumElements();
size_t unrollCols = mapOutSize * subBatchSize;
// Reserve space for:
// 1. Unrolled inputs.
// 2. Transposed source gradients (optional).
workspace.Resize(unrollCols, unrollRows + (subBatchSize > 1 ? mapOutCount : 0));
for (size_t start = 0; start < batchSize; start += subBatchSize)
{
size_t curBatchSize = min(subBatchSize, batchSize - start);
// 1. Transpose and reshape srcGrad.
auto srcGradSlice = srcGrad.ColumnSlice(start, curBatchSize);
if (curBatchSize > 1)
{
auto srcGradTranSlice = workspace.ColumnSlice(unrollRows, mapOutCount);
if (curBatchSize != subBatchSize)
{
srcGradTranSlice.Reshape(mapOutCount * mapOutSize, subBatchSize);
srcGradTranSlice = srcGradTranSlice.ColumnSlice(0, curBatchSize);
}
// Reshape to transposed shape - required by AssignTransposeOf.
srcGradTranSlice.Reshape(srcGradSlice.GetNumCols(), srcGradSlice.GetNumRows());
srcGradTranSlice.AssignTransposeOf(srcGradSlice);
srcGradSlice = srcGradTranSlice.ColumnSlice(0, srcGradTranSlice.GetNumCols());
}
srcGradSlice.Reshape(mapOutSize * curBatchSize, mapOutCount);
// 2. Unroll inputs.
auto inputSlice = in.ColumnSlice(start, curBatchSize);
auto unrolledInputSlice = workspace.ColumnSlice(0, unrollRows);
if (curBatchSize != subBatchSize)
{
unrolledInputSlice.Reshape(mapOutSize * unrollRows, subBatchSize);
unrolledInputSlice = unrolledInputSlice.ColumnSlice(0, curBatchSize);
}
unrolledInputSlice.Reshape(mapOutSize * curBatchSize, unrollRows);
unrolledInputSlice.SetValue(0);
inputSlice.UnrollConvolutionInputForKernelBackprop(mapOutSize, m_mpRowCol, *m_mpRowRun, *m_runs, unrolledInputSlice);
// cudnn layout uses row-major kernel weight matrix.
auto kernGrad = kernelGrad.ColumnSlice(0, kernelGrad.GetNumCols());
kernGrad.Reshape(kernelGrad.GetNumCols(), kernelGrad.GetNumRows());
// 3. Multiply.
Mat::MultiplyAndAdd(unrolledInputSlice, true, srcGradSlice, false, kernGrad);
}
}
public:
static bool IsSupported(DEVICEID_TYPE deviceId, ConvolveGeometryPtr geometry)
{
return deviceId < 0 &&
find(begin(geometry->Sharing()), end(geometry->Sharing()), false) == end(geometry->Sharing());
}
};
template <class ElemType>
std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create(ConvolveGeometryPtr geometry, DEVICEID_TYPE deviceId,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, PoolKind poolKind,
@ -539,6 +841,12 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
{
fprintf(stderr, "\nUsing GEMM convolution engine for geometry: %s.\n", engStr.c_str());
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (!isEnabled(ConvolutionEngineKind::Reference))
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
fprintf(stderr, "\nUsing reference convolution engine for geometry: %s.\n", engStr.c_str());

Просмотреть файл

@ -18,11 +18,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
enum class ConvolutionEngineKind
{
None = 0,
Reference = 1,
CuDnn = 1 << 1,
Legacy = 1 << 2,
Reference = 1, // Reference, lookup-based implementation. Very slow but works for any convo configuration.
CuDnn = 1 << 1, // cuDNN, works only for 2D/3D convos with full sharing.
Legacy = 1 << 2, // Legacy, for backwards compatibility. REVIEW alexeyk: implement sparse version and remove Legacy altogether.
Gemm = 1 << 3, // Uses convolution unrolling+GEMM technique. Works only for convos with full sharing.
All = Reference | CuDnn | Legacy
All = Reference | CuDnn | Legacy | Gemm
};
enum class PoolKind

Просмотреть файл

@ -492,6 +492,57 @@ void GPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& denseMatrix,
}
}
template <class ElemType>
GPUSPARSE_INDEX_TYPE* GPUSparseMatrix<ElemType>::GetCondensedVector() const
{
if (GetFormat() == MatrixFormat::matrixFormatSparseCSC || GetFormat() == MatrixFormat::matrixFormatSparseCSR)
{
PrepareDevice();
GPUSPARSE_INDEX_TYPE* pArray = new GPUSPARSE_INDEX_TYPE[SecondaryIndexCount()];
CUDA_CALL(cudaMemcpy(pArray, SecondaryIndexLocation(), sizeof(GPUSPARSE_INDEX_TYPE) * SecondaryIndexCount(), cudaMemcpyDeviceToHost));
return pArray;
}
else
{
return NULL;
}
}
template <class ElemType>
void GPUSparseMatrix<ElemType>::MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val)
{
VerifyWritable(__func__);
size_t n = GetNumCols();
if (n != columnsMask.GetNumCols())
RuntimeError("Matrix and column mask must have equal number of columns");
if (val != 0)
LogicError("MaskColumnsValue is not implmented for a non-zero mask for sparse matrices.");
#ifdef _DEBUG
if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
{
// TODO: We could do this on the GPU, but for now C++ is easier.
// Download the binary columns mask
char* maskedCols = columnsMask.CopyToArray();
// If we're CSC, we only need to verify that the columns to be zeroed are empty, since val == 0.
// So just download the condensed column vector.
GPUSPARSE_INDEX_TYPE* colVector = GetCondensedVector();
// Verify that if the column is to be masked, there are no elements in it.
#pragma omp parallel for
for (long j = 0; j < n; j++)
if (maskedCols[j] == 0 && colVector[j + 1] != colVector[j])
RuntimeError("GPUSparseMatrix attempted to mask column %d, but it has %d elements in it.", (int)j, (int)(colVector[j + 1] - colVector[j]));
}
else
NOT_IMPLEMENTED;
#endif
}
template <class ElemType>
GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::operator=(const GPUSparseMatrix<ElemType>& deepCopy)
{

Просмотреть файл

@ -285,6 +285,9 @@ public:
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
GPUSPARSE_INDEX_TYPE* GetCondensedVector() const;
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);
void Reshape(const size_t numRows, const size_t numCols);
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);

Просмотреть файл

@ -77,12 +77,12 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>libacml_mp_dll.lib;Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>libacml_mp_dll.dll; $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" $(OutputPath)</Command>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" "$(OutputPath)"</Command>
<Message>Copying ACML DLLs</Message>
</PostBuildEvent>
<CudaCompile>
@ -119,12 +119,12 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>libacml_mp_dll.lib;Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>libacml_mp_dll.dll; $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" $(OutputPath)</Command>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" "$(OutputPath)"</Command>
<Message>Copying ACML DLLs</Message>
</PostBuildEvent>
<CudaCompile>
@ -189,13 +189,6 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="BatchNormalizationEngine.cpp" />
<ClCompile Include="ConvolutionEngine.cpp" />
<ClCompile Include="CPUSparseMatrix.cpp" />
@ -221,4 +214,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Просмотреть файл

@ -16,19 +16,11 @@
<RootNamespace>Math</RootNamespace>
<ProjectName>MathCUDA</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<PropertyGroup>
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<Choose>
<When Condition="Exists('$(CUDNN_PATH)')">
@ -58,37 +50,28 @@
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<PropertyGroup Condition="$(DebugBuild)">
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_20,compute_20;compute_30,sm_30</CudaCodeGen>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<PropertyGroup Condition="$(ReleaseBuild)">
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;</CudaCodeGen>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<PreprocessorDefinitions>NO_SYNC; WIN32; _WINDOWS; _USRDLL; MATH_EXPORTS; $(CuDnnDefine); %(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\include;$(ACML_PATH)\include;$(CudaInclude);$(CUB_PATH);$(CuDnnIncPath)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\include;$(CudaInclude);$(CUB_PATH);$(CuDnnIncPath)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(CudaLibs);libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<AdditionalDependencies>$(CudaLibs);%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>$(CudaDlls);%(DelayLoadDLLs)</DelayLoadDLLs>
</Link>
<Lib>
<AdditionalLibraryDirectories>$(OutDir);$(ACML_PATH)\lib;$(CudaLibPath);$(CuDnnLibPath)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>$(OutDir);$(CudaLibPath);$(CuDnnLibPath)</AdditionalLibraryDirectories>
<AdditionalDependencies>$(CuDnnLib)</AdditionalDependencies>
</Lib>
<CudaCompile>
@ -102,34 +85,22 @@
<AdditionalOptions>-Xcudafe "--diag_suppress=field_without_dll_interface" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<PostBuildEvent>
<Command>for %%l in ($(CudaDlls)) do if exist "$(CudaPath)\bin\%%l" xcopy /D /Y "$(CudaPath)\bin\%%l*" $(OutputPath)
if exist "$(CuDnnDll)" (xcopy /Y "$(CuDnnDll)" $(OutputPath))
<Command>for %%l in ($(CudaDlls)) do if exist "$(CudaPath)\bin\%%l" xcopy /D /Y "$(CudaPath)\bin\%%l*" "$(OutputPath)"
if exist "$(CuDnnDll)" xcopy /Y "$(CuDnnDll)" "$(OutputPath)"
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PreprocessorDefinitions>_DEBUG; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>Disabled</Optimization>
<MinimalRebuild>false</MinimalRebuild>
</ClCompile>
<Link>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<FloatingPointExceptions>false</FloatingPointExceptions>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<CudaCompile>
<HostDebugInfo>false</HostDebugInfo>
</CudaCompile>
@ -183,7 +154,6 @@ if exist "$(CuDnnDll)" (xcopy /Y "$(CuDnnDll)" $(OutputPath))
<CudaCompile Include="GPUWatcher.cu">
<FileType>CppCode</FileType>
</CudaCompile>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="cudalattice.cpp" />
<ClCompile Include="cudalib.cpp" />
<CudaCompile Include="CuDnnConvolutionEngine.cu">
@ -222,6 +192,6 @@ if exist "$(CuDnnDll)" (xcopy /Y "$(CuDnnDll)" $(OutputPath))
</ImportGroup>
<Target Name="CheckDependencies">
<Error Condition="!Exists('$(CUB_PATH)')" Text="CNTK requires the NVIDIA CUB library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
<Warning Condition="!Exists('$(CUDNN_PATH)')" Text="CNTK requires the NVIDIA cuDNN library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
<Error Condition="!Exists('$(CUDNN_PATH)')" Text="CNTK requires the NVIDIA cuDNN library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
</Target>
</Project>
</Project>

Просмотреть файл

@ -1152,12 +1152,11 @@ void Matrix<ElemType>::MaskColumnsValue(const Matrix<char>& columnsMask, ElemTyp
else if (GetDeviceId() != columnsMask.GetDeviceId() && columnsMask.GetCurrentMatrixLocation() != BOTH)
RuntimeError("MaskColumnsValue: Matrix and column mask must be on the same device.");
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val),
m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
DISPATCH_MATRIX_ON_FLAG(this, this,
{ m_CPUMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val); },
{ m_GPUMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val); },
{ m_CPUSparseMatrix->MaskColumnsValue(*columnsMask.m_CPUMatrix, val); },
{ m_GPUSparseMatrix->MaskColumnsValue(*columnsMask.m_GPUMatrix, val); });
}
template <class ElemType>
@ -4031,6 +4030,63 @@ void Matrix<ElemType>::ConvolutionBackwardKernel(const Matrix<ElemType>& in, con
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInput(unrollCols, mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionOutput(unrollCols, mapInCount, mapOutCount, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowRun.GetNumCols() == 1);
assert(runs.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, output);
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->UnrollConvolutionInputForKernelBackprop(mapOutSize, *(mpRowCol.m_CPUMatrix),
*(mpRowRun.m_CPUMatrix), *(runs.m_CPUMatrix), *(output.m_CPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxPoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const
{

Просмотреть файл

@ -121,7 +121,7 @@ public:
~Matrix();
// workaround to bugs in BOTH implementation: force to collapse to home location
void CollapseDataLocationAfterWriting() const
void CollapseDataLocation() const
{
SetDataLocation(GetDeviceId() < 0 ? CurrentDataLocation::CPU : CurrentDataLocation::GPU, GetMatrixType());
}
@ -479,6 +479,13 @@ public:
void ConvolutionBackwardKernel(const Matrix<ElemType>& in, const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIwht,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& kernelGrad) const;
void UnrollConvolutionInput(size_t unrollCols, size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const;
void UnrollConvolutionOutput(size_t unrollCols, size_t mapInCount, size_t mapOutCount, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const;
void UnrollConvolutionInputForKernelBackprop(size_t mapOutSize, const Matrix<int>& mpRowCol,
const Matrix<int>& mpRowRun, const Matrix<int>& runs, Matrix<ElemType>& output) const;
void MaxPoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const;
void MaxPoolingBackward(const Matrix<ElemType>& out, const Matrix<ElemType>& in,
const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices,

Просмотреть файл

@ -92,6 +92,17 @@ void GPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& deepCo
{
}
template <class ElemType>
GPUSPARSE_INDEX_TYPE* GPUSparseMatrix<ElemType>::GetCondensedVector() const
{
return NULL;
}
template <class ElemType>
void GPUSparseMatrix<ElemType>::MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val)
{
}
template <class ElemType>
GPUSparseMatrix<ElemType>& GPUSparseMatrix<ElemType>::operator=(const GPUSparseMatrix<ElemType>& deepCopy)
{

Просмотреть файл

@ -47,6 +47,7 @@ OverloadUnaryMathFns(sqrt);
OverloadUnaryMathFns(fabs);
OverloadUnaryMathFns(cos);
OverloadUnaryMathFns(sin);
OverloadUnaryMathFns(floor);
#pragma pop_macro("OverloadUnaryMathFns")
@ -194,6 +195,7 @@ DefUnaryOp(Copy, a);
DefUnaryOp(Negate, -a);
DefUnaryOp(Not, !a);
DefUnaryOp(Abs, fabs_(a));
DefUnaryOp(Floor, floor_(a));
DefUnaryOp(Sigmoid, Sigmoid(a));
DefUnaryOp(Tanh, tanh_(a));
DefUnaryOp(Sqr, Sqr(a));
@ -214,7 +216,8 @@ DefUnaryOp(Reciprocal, a == 0 ? 0 : 1 / a);
return expr; \
}
//#define DefBinaryOp(op, expr) template<class ElemType> DECL ElemType Op ## op(const ElemType & a, ElemType b, int i = 0) { UNUSED(i); return expr; }
DefBinaryOp(CopyIf, a != 0 ? b : 0);
DefBinaryOp(CopyIfNot, a == 0 ? b : 0);
DefBinaryOp(Sum, a + b);
DefBinaryOp(Difference, a - b);
DefBinaryOp(ElementwiseProduct, a* b);
@ -255,7 +258,8 @@ DefBinaryOp(SqrOfDifference, Sqr(a - b));
}
DefTernaryOp(Cond, a ? b : c);
DefTernaryOp(Clip, a < b ? b : (a > c ? c : a));
DefTernaryOp(CopyIfEqual, a == b ? c : 0); // CopyIfEqual(a,b)(c) -- if a==b copy c, otherwise 0; used for gradient of clip, min, max, etc.
DefTernaryOp(Clip, c < a ? a : (c > b ? b : c)); // Clip(min,max)(data) => a=min, b=max, c=data
DefTernaryOp(ElementwiseProductWithLogSumDerivative, a * Sigmoid(c - b));
#pragma pop_macro("DefTernaryOp")

Просмотреть файл

@ -71,7 +71,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -92,7 +92,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -106,21 +106,9 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\DataWriter.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="BinaryFile.cpp" />
<ClCompile Include="BinaryReader.cpp" />
<ClCompile Include="BinaryWriter.cpp" />
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="dllmain.cpp">
<CompileAsManaged>false</CompileAsManaged>
@ -134,4 +122,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -47,6 +47,12 @@ CNTKTextFormatReader::CNTKTextFormatReader(MemoryProviderPtr provider,
randomizer->Initialize(nullptr, config);
m_transformer = randomizer;
// TODO: add "frameMode" config paramter
m_packer = std::make_shared<SequencePacker>(
m_provider,
m_transformer,
GetStreamDescriptions());
}
catch (const std::runtime_error& e)
{
@ -61,18 +67,13 @@ std::vector<StreamDescriptionPtr> CNTKTextFormatReader::GetStreamDescriptions()
void CNTKTextFormatReader::StartEpoch(const EpochConfiguration& config)
{
if (config.m_totalEpochSizeInSamples <= 0)
if (config.m_totalEpochSizeInSamples == 0)
{
RuntimeError("Unsupported minibatch size '%d'.", (int)config.m_totalEpochSizeInSamples);
RuntimeError("Epoch size cannot be 0.");
}
m_transformer->StartEpoch(config);
// TODO: add "frameMode" config paramter
m_packer = std::make_shared<SequencePacker>(
m_provider,
m_transformer,
config.m_minibatchSizeInSamples,
GetStreamDescriptions());
m_packer->StartEpoch(config);
}
Minibatch CNTKTextFormatReader::ReadMinibatch()

Просмотреть файл

@ -62,7 +62,7 @@
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ReaderLib.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ReaderLib.lib;Math.lib;Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
@ -100,17 +100,6 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Indexer.cpp" />
<ClCompile Include="TextConfigHelper.cpp" />
<ClCompile Include="TextParser.cpp" />

Просмотреть файл

@ -219,8 +219,6 @@ void TextParser<ElemType>::TextDataChunk::GetSequence(size_t sequenceId, std::ve
{
auto it = m_sequencePtrMap.find(sequenceId);
assert(it != m_sequencePtrMap.end());
//TODO: Remove pragma once new randomizer is in master.
#pragma omp atomic
++m_sequenceRequestCount;
result.reserve(it->second.size());
copy(it->second.begin(), it->second.end(), back_inserter(result));
@ -230,52 +228,47 @@ template <class ElemType>
ChunkPtr TextParser<ElemType>::GetChunk(size_t chunkId)
{
ChunkPtr chunk;
//TODO: Remove pragma once new randomizer is in master.
#pragma omp critical
auto it = m_chunkCache.find(chunkId);
if (it != m_chunkCache.end())
{
auto it = m_chunkCache.find(chunkId);
if (it != m_chunkCache.end())
{
chunk = it->second;
}
else
{
const auto& chunkDescriptor = m_indexer->GetIndex()[chunkId];
auto textChunk = make_shared<TextDataChunk>(chunkDescriptor);
chunk = it->second;
}
else
{
const auto& chunkDescriptor = m_indexer->GetIndex()[chunkId];
auto textChunk = make_shared<TextDataChunk>(chunkDescriptor);
attempt(5, [this, &textChunk, &chunkDescriptor]()
{
LoadChunk(textChunk, chunkDescriptor);
});
attempt(5, [this, &textChunk, &chunkDescriptor]()
{
LoadChunk(textChunk, chunkDescriptor);
});
if (m_chunkCacheSize > 0 && m_chunkCache.size() == m_chunkCacheSize)
if (m_chunkCacheSize > 0 && m_chunkCache.size() == m_chunkCacheSize)
{
size_t candidateId = SIZE_MAX;
size_t minNumSequencesLeft = SIZE_MAX;
for (const auto& it : m_chunkCache)
{
size_t candidateId = SIZE_MAX;
size_t minNumSequencesLeft = SIZE_MAX;
for (const auto& it : m_chunkCache)
const auto& chunk = *(it.second.get());
size_t numSequencesUsed = 0;
numSequencesUsed += chunk.m_sequenceRequestCount;
size_t numSequencesLeft = chunk.m_sequences.size() - numSequencesUsed;
if (numSequencesLeft < minNumSequencesLeft)
{
const auto& chunk = *(it.second.get());
size_t numSequencesUsed = 0;
#pragma omp atomic
numSequencesUsed += chunk.m_sequenceRequestCount;
size_t numSequencesLeft = chunk.m_sequences.size() - numSequencesUsed;
if (numSequencesLeft < minNumSequencesLeft)
{
minNumSequencesLeft = numSequencesLeft;
candidateId = it.first;
}
minNumSequencesLeft = numSequencesLeft;
candidateId = it.first;
}
assert(candidateId != SIZE_MAX);
m_chunkCache.erase(candidateId);
}
if (m_chunkCacheSize > 0)
{
m_chunkCache[chunkId] = textChunk;
}
chunk = textChunk;
assert(candidateId != SIZE_MAX);
m_chunkCache.erase(candidateId);
}
if (m_chunkCacheSize > 0)
{
m_chunkCache[chunkId] = textChunk;
}
chunk = textChunk;
}
return chunk;
}

Просмотреть файл

@ -69,7 +69,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -90,7 +90,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -131,4 +131,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -54,7 +54,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ReaderLib.lib;Math.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ReaderLib.lib;Common.lib;Math.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
@ -96,27 +96,12 @@
<ClInclude Include="UtteranceDescription.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\TimerUtility.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="ConfigHelper.cpp" />
<ClCompile Include="Exports.cpp" />
<ClCompile Include="dllmain.cpp">
<CompileAsManaged>false</CompileAsManaged>
<PrecompiledHeader />
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp">
<PrecompiledHeader />
</ClCompile>
<ClCompile Include="HTKDataDeserializer.cpp" />
<ClCompile Include="HTKMLFReader.cpp" />
<ClCompile Include="MLFDataDeserializer.cpp" />
@ -127,4 +112,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -129,7 +129,8 @@ public:
}
catch (...)
{
ReleaseData();
// Releasing all data
m_frames.resize(0, 0);
throw;
}
}

Просмотреть файл

@ -210,7 +210,8 @@ void HTKDataDeserializer::GetSequencesForChunk(size_t chunkId, vector<SequenceDe
for (size_t i = 0; i < chunk.GetNumberOfUtterances(); ++i)
{
auto utterance = chunk.GetUtterance(i);
size_t major = utterance->GetId();
// Currently we do not support common prefix, so simply assign the minor to the key.
size_t sequence = utterance->GetId();
if (m_frameMode)
{
@ -219,8 +220,8 @@ void HTKDataDeserializer::GetSequencesForChunk(size_t chunkId, vector<SequenceDe
{
SequenceDescription f;
f.m_chunkId = chunkId;
f.m_key.m_major = major;
f.m_key.m_minor = k;
f.m_key.m_sequence = sequence;
f.m_key.m_sample = k;
f.m_id = offsetInChunk++;
f.m_isValid = true;
f.m_numberOfSamples = 1;
@ -232,8 +233,8 @@ void HTKDataDeserializer::GetSequencesForChunk(size_t chunkId, vector<SequenceDe
// Creating sequence description per utterance.
SequenceDescription f;
f.m_chunkId = chunkId;
f.m_key.m_major = major;
f.m_key.m_minor = 0;
f.m_key.m_sequence = sequence;
f.m_key.m_sample = 0;
f.m_id = offsetInChunk++;
f.m_isValid = true;
f.m_numberOfSamples = utterance->GetNumberOfFrames();
@ -432,7 +433,7 @@ static SequenceDescription s_InvalidSequence{0, 0, 0, false};
void HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& d)
{
assert(!m_primary);
auto iter = m_keyToChunkLocation.find(key.m_major);
auto iter = m_keyToChunkLocation.find(key.m_sequence);
if (iter == m_keyToChunkLocation.end())
{
// Unknown sequence. Return invalid.
@ -443,7 +444,7 @@ void HTKDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, Sequen
const auto& chunk = m_chunks[iter->second.first];
const auto& sequence = chunk.GetUtterance(iter->second.second);
d.m_chunkId = sequence->GetChunkId();
d.m_id = m_frameMode ? sequence->GetStartFrameIndexInsideChunk() + key.m_minor : sequence->GetIndexInsideChunk();
d.m_id = m_frameMode ? sequence->GetStartFrameIndexInsideChunk() + key.m_sample : sequence->GetIndexInsideChunk();
d.m_isValid = true;
d.m_numberOfSamples = m_frameMode ? 1 : sequence->GetNumberOfFrames();
}

Просмотреть файл

@ -13,7 +13,7 @@
#include "StringUtil.h"
#include "FramePacker.h"
#include "SequencePacker.h"
#include "BpttPacker.h"
#include "TruncatedBpttPacker.h"
#include "BlockRandomizer.h"
#include "NoRandomizer.h"
@ -136,22 +136,6 @@ HTKMLFReader::HTKMLFReader(MemoryProviderPtr provider,
m_streams.push_back(stream);
}
}
}
std::vector<StreamDescriptionPtr> HTKMLFReader::GetStreamDescriptions()
{
assert(!m_streams.empty());
return m_streams;
}
void HTKMLFReader::StartEpoch(const EpochConfiguration& config)
{
if (config.m_totalEpochSizeInSamples <= 0)
{
RuntimeError("Unsupported minibatch size '%d'.", (int)config.m_totalEpochSizeInSamples);
}
m_randomizer->StartEpoch(config);
// TODO: should we unify sample and sequence mode packers into a single one.
// TODO: functionally they are the same, the only difference is how we handle
@ -164,20 +148,35 @@ void HTKMLFReader::StartEpoch(const EpochConfiguration& config)
switch (m_packingMode)
{
case PackingMode::sample:
m_packer = std::make_shared<FramePacker>(
m_provider,
m_randomizer,
config.m_minibatchSizeInSamples,
m_streams);
m_packer = std::make_shared<FramePacker>(m_provider, m_randomizer, m_streams);
break;
case PackingMode::sequence:
m_packer = std::make_shared<SequencePacker>(
m_provider,
m_randomizer,
config.m_minibatchSizeInSamples,
m_streams);
m_packer = std::make_shared<SequencePacker>(m_provider, m_randomizer, m_streams);
break;
case PackingMode::truncated:
m_packer = std::make_shared<TruncatedBPTTPacker>(m_provider, m_randomizer, m_streams);
break;
default:
LogicError("Unsupported type of packer '%d'.", (int)m_packingMode);
}
}
std::vector<StreamDescriptionPtr> HTKMLFReader::GetStreamDescriptions()
{
assert(!m_streams.empty());
return m_streams;
}
void HTKMLFReader::StartEpoch(const EpochConfiguration& config)
{
if (config.m_totalEpochSizeInSamples == 0)
{
RuntimeError("Epoch size cannot be 0.");
}
if (m_packingMode == PackingMode::truncated)
{
size_t minibatchSize = config.m_minibatchSizeInSamples;
size_t truncationLength = m_truncationLength;
@ -191,17 +190,22 @@ void HTKMLFReader::StartEpoch(const EpochConfiguration& config)
size_t numParallelSequences = m_numParallelSequencesForAllEpochs[config.m_epochIndex];
minibatchSize = numParallelSequences * truncationLength;
}
EpochConfiguration bpttConfig;
bpttConfig.m_numberOfWorkers = config.m_numberOfWorkers;
bpttConfig.m_workerRank = config.m_workerRank;
bpttConfig.m_totalEpochSizeInSamples = config.m_totalEpochSizeInSamples;
bpttConfig.m_epochIndex = config.m_epochIndex;
bpttConfig.m_minibatchSizeInSamples = minibatchSize;
bpttConfig.m_truncationSize = truncationLength;
m_packer = std::make_shared<BpttPacker>(
m_provider,
m_randomizer,
minibatchSize,
truncationLength,
m_streams);
break;
m_randomizer->StartEpoch(bpttConfig);
m_packer->StartEpoch(bpttConfig);
}
default:
LogicError("Unsupported type of packer '%d'.", (int)m_packingMode);
else
{
m_randomizer->StartEpoch(config);
m_packer->StartEpoch(config);
}
}

Просмотреть файл

@ -94,7 +94,7 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
if (!stringRegistry.TryGet(l.first, id))
continue;
description.m_key.m_major = id;
description.m_key.m_sequence = id;
const auto& utterance = l.second;
description.m_sequenceStart = m_classIds.size();
@ -130,18 +130,18 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
description.m_numberOfSamples = numberOfFrames;
totalFrames += numberOfFrames;
m_utteranceIndex.push_back(m_frames.size());
m_keyToSequence[description.m_key.m_major] = m_utteranceIndex.size() - 1;
m_keyToSequence[description.m_key.m_sequence] = m_utteranceIndex.size() - 1;
// TODO: Should be created by chunks only.
MLFFrame f;
f.m_chunkId = 0;
f.m_numberOfSamples = 1;
f.m_key.m_major = description.m_key.m_major;
f.m_key.m_sequence = description.m_key.m_sequence;
f.m_isValid = description.m_isValid;
for (size_t k = 0; k < description.m_numberOfSamples; ++k)
{
f.m_id = m_frames.size();
f.m_key.m_minor = k;
f.m_key.m_sample = k;
f.m_index = description.m_sequenceStart + k;
m_frames.push_back(f);
}
@ -208,8 +208,8 @@ void MLFDataDeserializer::GetSequencesForChunk(size_t, std::vector<SequenceDescr
for (size_t i = 0; i < m_frames.size(); ++i)
{
SequenceDescription f;
f.m_key.m_major = m_frames[i].m_key.m_major;
f.m_key.m_minor = m_frames[i].m_key.m_minor;
f.m_key.m_sequence = m_frames[i].m_key.m_sequence;
f.m_key.m_sample = m_frames[i].m_key.m_sample;
f.m_id = m_frames[i].m_id;
f.m_chunkId = m_frames[i].m_chunkId;
f.m_numberOfSamples = 1;
@ -223,8 +223,8 @@ void MLFDataDeserializer::GetSequencesForChunk(size_t, std::vector<SequenceDescr
for (size_t i = 0; i < m_utteranceIndex.size() - 1; ++i)
{
SequenceDescription f;
f.m_key.m_major = m_frames[m_utteranceIndex[i]].m_key.m_major;
f.m_key.m_minor = 0;
f.m_key.m_sequence = m_frames[m_utteranceIndex[i]].m_key.m_sequence;
f.m_key.m_sample = 0;
f.m_id = i;
f.m_chunkId = m_frames[m_utteranceIndex[i]].m_chunkId;
f.m_numberOfSamples = m_utteranceIndex[i + 1] - m_utteranceIndex[i];
@ -305,7 +305,7 @@ static SequenceDescription s_InvalidSequence { 0, 0, 0, false };
void MLFDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& result)
{
auto sequenceId = m_keyToSequence.find(key.m_major);
auto sequenceId = m_keyToSequence.find(key.m_sequence);
if (sequenceId == m_keyToSequence.end())
{
result = s_InvalidSequence;
@ -314,13 +314,13 @@ void MLFDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, Sequen
if (m_frameMode)
{
size_t index = m_utteranceIndex[sequenceId->second] + key.m_minor;
size_t index = m_utteranceIndex[sequenceId->second] + key.m_sample;
result = m_frames[index];
}
else
{
result.m_key.m_major = key.m_major;
result.m_key.m_minor = 0;
result.m_key.m_sequence = key.m_sequence;
result.m_key.m_sample = 0;
result.m_id = sequenceId->second;
result.m_chunkId = m_frames[m_utteranceIndex[sequenceId->second]].m_chunkId;
result.m_numberOfSamples = m_utteranceIndex[sequenceId->second + 1] - m_utteranceIndex[sequenceId->second];

Просмотреть файл

@ -70,7 +70,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
</Link>
@ -94,7 +94,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
@ -123,11 +123,6 @@
<ClInclude Include="utterancesourcemulti.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\TimerUtility.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="DataWriterLocal.cpp" />
<ClCompile Include="dllmain.cpp">
@ -135,9 +130,6 @@
<PrecompiledHeader>
</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="HTKMLFReader.cpp" />
<ClCompile Include="HTKMLFWriter.cpp" />
<ClCompile Include="latticearchive.cpp" />
@ -148,4 +140,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -153,9 +153,9 @@ class minibatchutterancesourcemulti : public minibatchsource
try // this function supports retrying since we read from the unrealible network, i.e. do not return in a broken state
{
msra::asr::htkfeatreader reader; // feature reader (we reinstantiate it for each block, i.e. we reopen the file actually)
auto_timer* p_pageintimer = nullptr;
std::unique_ptr<auto_timer> pageintimer = nullptr;
if (verbosity > 2)
p_pageintimer = new auto_timer();
pageintimer.reset(new auto_timer());
// if this is the first feature read ever, we explicitly open the first file to get the information such as feature dimension
if (featdim == 0)
{
@ -181,21 +181,29 @@ class minibatchutterancesourcemulti : public minibatchsource
fprintf(stderr, "requiredata: %d utterances read\n", (int)utteranceset.size());
if (verbosity > 2)
{
if (p_pageintimer != nullptr)
if (pageintimer != nullptr)
{
double pageintime = (double)(*p_pageintimer);
fprintf(stderr, "Chunk read statistics; Total time = %.8g, Num Frames read = %zu, Num bytes per frame = %zu, Avg I/O bandwidth = %.2g MB/sec).\n",
pageintime, totalframes, featdim * sizeof(float), (double)(featdim * sizeof(float) * totalframes / 1024 / 1024 / pageintime));
double pageintime = (double)(*pageintimer);
#ifdef _MSC_VER
fprintf(stderr, "Chunk read statistics; Total time = %.8g, Num Frames read = %Iu, Num bytes per frame = %Iu, Avg I/O bandwidth = %.2g MB/sec).\n",
pageintime, totalframes, featdim * sizeof(float), (double)(featdim * sizeof(float) * totalframes / 1024 / 1024 / pageintime));
#else
fprintf(stderr, "Chunk read statistics; Total time = %.8g, Num Frames read = %zu, Num bytes per frame = %zu, Avg I/O bandwidth = %.2g MB/sec).\n",
pageintime, totalframes, featdim * sizeof(float), (double)(featdim * sizeof(float) * totalframes / 1024 / 1024 / pageintime));
#endif
}
}
}
}
catch (...)
{
releasedata();
// Clean up in a non-throwable way in order not to hide the original exception.
cleandata();
throw;
}
}
// page out data for this chunk
void releasedata() const
{
@ -203,6 +211,12 @@ class minibatchutterancesourcemulti : public minibatchsource
LogicError("releasedata: cannot page out virgin block");
if (!isinram())
LogicError("releasedata: called when data is not memory");
cleandata();
}
private:
void cleandata() const
{
// release frames
frames.resize(0, 0);
// release lattice data

Просмотреть файл

@ -22,7 +22,7 @@ public:
virtual ~ByteReader() = default;
virtual void Register(size_t seqId, const std::string& path) = 0;
virtual cv::Mat Read(size_t seqId, const std::string& path) = 0;
virtual cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) = 0;
DISABLE_COPY_AND_MOVE(ByteReader);
};
@ -31,7 +31,7 @@ class FileByteReader : public ByteReader
{
public:
void Register(size_t, const std::string&) override {}
cv::Mat Read(size_t seqId, const std::string& path) override;
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
};
#ifdef USE_ZIP
@ -41,7 +41,7 @@ public:
ZipByteReader(const std::string& zipPath);
void Register(size_t seqId, const std::string& path) override;
cv::Mat Read(size_t seqId, const std::string& path) override;
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
private:
using ZipPtr = std::unique_ptr<zip_t, void(*)(zip_t*)>;

Просмотреть файл

@ -77,6 +77,7 @@ ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
m_mapPath = config(L"file");
m_grayscale = config(L"grayscale", false);
std::string rand = config(L"randomize", "auto");
if (AreEqualIgnoreCase(rand, "auto"))

Просмотреть файл

@ -46,7 +46,12 @@ public:
return m_randomize;
}
bool IsMultiViewCrop() const
bool UseGrayscale() const
{
return m_grayscale;
}
bool IsMultiViewCrop() const
{
return m_multiViewCrop;
}
@ -61,6 +66,7 @@ private:
int m_cpuThreadCount;
bool m_randomize;
bool m_multiViewCrop;
bool m_grayscale;
};
typedef std::shared_ptr<ImageConfigHelper> ImageConfigHelperPtr;

Просмотреть файл

@ -77,7 +77,7 @@ public:
const auto& imageSequence = m_description;
auto image = std::make_shared<DeserializedImage>();
image->m_image = std::move(m_parent.ReadImage(m_description.m_id, imageSequence.m_path));
image->m_image = std::move(m_parent.ReadImage(m_description.m_id, imageSequence.m_path, m_parent.m_grayscale));
auto& cvImage = image->m_image;
if (!cvImage.data)
@ -119,7 +119,8 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
ImageConfigHelper configHelper(config);
m_streams = configHelper.GetStreams();
assert(m_streams.size() == 2);
const auto& label = m_streams[configHelper.GetLabelStreamId()];
m_grayscale = configHelper.UseGrayscale();
const auto& label = m_streams[configHelper.GetLabelStreamId()];
const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
// Expect data in HWC.
@ -214,8 +215,8 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
description.m_chunkId = curId;
description.m_path = imagePath;
description.m_classId = cid;
description.m_key.m_major = description.m_id;
description.m_key.m_minor = 0;
description.m_key.m_sequence = description.m_id;
description.m_key.m_sample = 0;
m_imageSequences.push_back(description);
RegisterByteReader(description.m_id, description.m_path, knownReaders);
@ -266,20 +267,23 @@ void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string&
#endif
}
cv::Mat ImageDataDeserializer::ReadImage(size_t seqId, const std::string& path)
cv::Mat ImageDataDeserializer::ReadImage(size_t seqId, const std::string& path, bool grayscale)
{
assert(!path.empty());
ImageDataDeserializer::SeqReaderMap::const_iterator r;
if (m_readers.empty() || (r = m_readers.find(seqId)) == m_readers.end())
return m_defaultReader.Read(seqId, path);
return (*r).second->Read(seqId, path);
return m_defaultReader.Read(seqId, path, grayscale);
return (*r).second->Read(seqId, path, grayscale);
}
cv::Mat FileByteReader::Read(size_t, const std::string& path)
cv::Mat FileByteReader::Read(size_t, const std::string& path, bool grayscale)
{
assert(!path.empty());
assert(!path.empty());
return cv::imread(path, cv::IMREAD_COLOR);
if (grayscale)
return cv::imread(path, cv::IMREAD_GRAYSCALE);
else
return cv::imread(path, cv::IMREAD_COLOR);
}
}}}

Просмотреть файл

@ -56,10 +56,13 @@ private:
// Element type of the feature/label stream (currently float/double only).
ElementType m_featureElementType;
// whether images shall be loaded in grayscale
bool m_grayscale;
// Not using nocase_compare here as it's not correct on Linux.
using PathReaderMap = std::unordered_map<std::string, std::shared_ptr<ByteReader>>;
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders);
cv::Mat ReadImage(size_t seqId, const std::string& path);
cv::Mat ReadImage(size_t seqId, const std::string& path, bool grayscale);
// REVIEW alexeyk: can potentially use vector instead of map. Need to handle default reader and resizing though.
using SeqReaderMap = std::unordered_map<size_t, std::shared_ptr<ByteReader>>;

Просмотреть файл

@ -38,13 +38,17 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
auto deserializer = std::make_shared<ImageDataDeserializer>(config);
TransformerPtr randomizer;
// Request multi-threaded randomizer operation to speed up CPU-intensive image-decoding and transformations.
const bool multithreadedGetNextSequences = true;
if (configHelper.ShouldRandomize())
{
randomizer = std::make_shared<BlockRandomizer>(0, 1, deserializer, BlockRandomizer::DecimationMode::sequence, false);
// We do not use legacy randomization.
bool useLegacyRandomization = false;
randomizer = std::make_shared<BlockRandomizer>(0, 1, deserializer, BlockRandomizer::DecimationMode::sequence, useLegacyRandomization, multithreadedGetNextSequences);
}
else
{
randomizer = std::make_shared<NoRandomizer>(deserializer);
randomizer = std::make_shared<NoRandomizer>(deserializer, multithreadedGetNextSequences);
}
randomizer->Initialize(nullptr, config);
@ -66,6 +70,11 @@ ImageReader::ImageReader(MemoryProviderPtr provider,
}
m_transformer = last;
m_packer = std::make_shared<FramePacker>(
m_provider,
m_transformer,
m_streams);
}
std::vector<StreamDescriptionPtr> ImageReader::GetStreamDescriptions()
@ -76,17 +85,13 @@ std::vector<StreamDescriptionPtr> ImageReader::GetStreamDescriptions()
void ImageReader::StartEpoch(const EpochConfiguration& config)
{
if (config.m_totalEpochSizeInSamples <= 0)
if (config.m_totalEpochSizeInSamples == 0)
{
RuntimeError("Unsupported minibatch size '%u'.", (int)config.m_totalEpochSizeInSamples);
RuntimeError("Epoch size cannot be 0.");
}
m_transformer->StartEpoch(config);
m_packer = std::make_shared<FramePacker>(
m_provider,
m_transformer,
config.m_minibatchSizeInSamples,
m_streams);
m_packer->StartEpoch(config);
}
Minibatch ImageReader::ReadMinibatch()

Просмотреть файл

@ -75,7 +75,7 @@
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ReaderLib.lib;Math.lib;$(OpenCVLib);$(ZipLibs);%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ReaderLib.lib;Common.lib;Math.lib;$(OpenCVLib);$(ZipLibs);%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command>if "$(HasOpenCv)" == "true" xcopy /I /D /Y "$(OPENCV_PATH)\x64\vc12\bin\opencv_world300.dll" "$(TargetDir)"
@ -126,18 +126,6 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="$(ReleaseBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="ImageConfigHelper.cpp" />
<ClCompile Include="ImageDataDeserializer.cpp" />
<ClCompile Include="dllmain.cpp" />

Просмотреть файл

@ -56,7 +56,7 @@ void ZipByteReader::Register(size_t seqId, const std::string& path)
m_zips.push(std::move(zipFile));
}
cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path)
cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscale)
{
// Find index of the file in .zip file.
auto r = m_seqIdToIndex.find(seqId);
@ -99,11 +99,15 @@ cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path)
}
m_zips.push(std::move(zipFile));
cv::Mat img = cv::imdecode(cv::Mat(1, (int)size, CV_8UC1, contents.data()), cv::IMREAD_COLOR);
cv::Mat img;
if (grayscale)
img = cv::imdecode(cv::Mat(1, (int)size, CV_8UC1, contents.data()), cv::IMREAD_GRAYSCALE);
else
img = cv::imdecode(cv::Mat(1, (int)size, CV_8UC1, contents.data()), cv::IMREAD_COLOR);
assert(nullptr != img.data);
m_workspace.push(std::move(contents));
return img;
}
}}}
#endif
#endif

Просмотреть файл

@ -69,7 +69,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -90,7 +90,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -107,18 +107,6 @@
<ClInclude Include="SequenceParser.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\DataWriter.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="dllmain.cpp">
<CompileAsManaged Condition="$(DebugBuild)">false</CompileAsManaged>
@ -142,4 +130,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -72,7 +72,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -93,7 +93,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -109,17 +109,6 @@
<ClInclude Include="LUSequenceParser.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="DataWriterLocal.cpp" />
<ClCompile Include="Exports.cpp" />
<ClCompile Include="dllmain.cpp">
@ -143,4 +132,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -69,7 +69,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -90,7 +90,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -105,27 +105,6 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\DataWriter.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="$(ReleaseBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="$(ReleaseBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
<PrecompiledHeader Condition="$(ReleaseBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="dllmain.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
@ -143,4 +122,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -21,7 +21,8 @@ BlockRandomizer::BlockRandomizer(
size_t randomizationRangeInSamples,
IDataDeserializerPtr deserializer,
DecimationMode decimationMode,
bool useLegacyRandomization)
bool useLegacyRandomization,
bool multithreadedGetNextSequence)
: m_verbosity(verbosity),
m_deserializer(deserializer),
m_decimationMode(decimationMode),
@ -31,7 +32,8 @@ BlockRandomizer::BlockRandomizer(
m_epochStartPosition(0),
m_sweepTotalNumberOfSamples(0),
m_lastSeenChunkId(SIZE_MAX),
m_chunkRandomizer(std::make_shared<ChunkRandomizer>(deserializer, randomizationRangeInSamples, useLegacyRandomization))
m_chunkRandomizer(std::make_shared<ChunkRandomizer>(deserializer, randomizationRangeInSamples, useLegacyRandomization)),
m_multithreadedGetNextSequences(multithreadedGetNextSequence)
{
assert(deserializer != nullptr);
@ -116,11 +118,7 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
result.m_data.resize(m_streams.size(), std::vector<SequenceDataPtr>(decimated.size()));
// TODO: This will be changed, when we move transformers under the randomizer.
// TODO: Randomizer won't should not deal with multithreading.
#pragma omp parallel for ordered schedule(dynamic)
for (int i = 0; i < decimated.size(); ++i)
{
auto process = [&](int i) -> void {
const auto& description = decimated[i];
std::vector<SequenceDataPtr> sequence;
auto it = m_chunks.find(description.m_chunk->m_chunkId);
@ -134,6 +132,19 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
{
result.m_data[j][i] = sequence[j];
}
};
// TODO: This will be changed, when we move transformers under the randomizer, should not deal with multithreading here.
if (m_multithreadedGetNextSequences)
{
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < decimated.size(); ++i)
process(i);
}
else
{
for (int i = 0; i < decimated.size(); ++i)
process(i);
}
m_sequenceRandomizer->ReleaseChunks();
@ -214,7 +225,7 @@ void BlockRandomizer::RetrieveDataChunks()
m_lastSeenChunkId = window.back().m_chunkId;
// in the loop we are building a new map of currently loaded chunks:
// we are iterating thru all chunks in the window and if they are not in m_chunks map -
// we are iterating thru all chunks in the window and if they are not in m_chunks map -
// they get requested from the deserializer.
// There could be some chunks in the m_chunks that are not required anymore, by swapping the chunks with m_chunks, we are removing those.
std::map<size_t, ChunkPtr> chunks;

Просмотреть файл

@ -20,10 +20,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// The code is based on the old block randomizer and it preserves the same behavior to pass all available tests.
// The high-level algorithm is:
// When next sequences are requested (limited by the sampleCount), the following steps are performed:
// 1) if a new sweep is entered, randomize chunk descriptions using ChunkRandomizer, also precalculate randomization windows for all
// 1) if a new sweep is entered, randomize chunk descriptions using ChunkRandomizer, also precalculate randomization windows for all
// chunk descriptions
// 2) if a new chunk is entered, using SequenceRandomizer identify a window of chunks and requested their sequence descriptions from deserializer.
// 3) randomize sequence descriptions inside the window
// 3) randomize sequence descriptions inside the window
// 4) return sequence descriptions not exceeding sampleCount/minibatch limit
// 5) decimate sequence descriptions based on the worker rank
// 6) request chunks of data based on decimated sequences and return sequence data
@ -47,7 +47,8 @@ public:
size_t randomizationRangeInSamples,
IDataDeserializerPtr deserializer,
DecimationMode decimationMode = DecimationMode::chunk,
bool useLegacyRandomization = false);
bool useLegacyRandomization = false,
bool multithreadedGetNextSequences = false);
virtual void Initialize(TransformerPtr, const ConfigParameters&) override {};
@ -118,6 +119,10 @@ private:
// Decimation mode.
DecimationMode m_decimationMode;
// Whether to get sequences using multiple thread.
// TODO temporary; should go away when transformers are moved closer to the deserializer
bool m_multithreadedGetNextSequences;
// General configuration
int m_verbosity;
};

Просмотреть файл

@ -10,11 +10,22 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// Sequence key, used for correlations between sequences between different deserializers.
// Sequence key, used for correlations of sequences between different deserializers.
// TODO: In many cases sequence keys share the same prefix. Splitting the sequence key on
// sequence prefix and suffix will allow us to store keys more efficiently.
// The sample identifies a particular sample inside the sequence. In the future it will be hidden, so that deserializers won't know about
// sequence or sample mode, exposing only sequences.
struct KeyType
{
size_t m_major;
size_t m_minor;
// Possible sequence common prefix.
// size_t m_prefix;
// Identifies sequence between different deserializers.
size_t m_sequence;
// Sample id.
size_t m_sample;
};
class Chunk;

Просмотреть файл

@ -16,9 +16,8 @@ public:
FramePacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams) :
SequencePacker(memoryProvider, transformer, minibatchSize, streams)
SequencePacker(memoryProvider, transformer, streams)
{
}

Просмотреть файл

@ -11,13 +11,14 @@
namespace Microsoft { namespace MSR { namespace CNTK {
NoRandomizer::NoRandomizer(IDataDeserializerPtr deserializer)
NoRandomizer::NoRandomizer(IDataDeserializerPtr deserializer, bool multithreadedGetNextSequences)
: m_deserializer(deserializer),
m_samplePositionInEpoch(0),
m_currentChunkPosition(SIZE_MAX),
m_globalSamplePosition(0),
m_totalNumberOfSamples(0),
m_currentSequencePositionInChunk(0)
m_currentSequencePositionInChunk(0),
m_multithreadedGetNextSequences(multithreadedGetNextSequences)
{
assert(deserializer != nullptr);
m_streams = m_deserializer->GetStreamDescriptions();
@ -172,22 +173,60 @@ Sequences NoRandomizer::GetNextSequences(size_t sampleCount)
}
result.m_data.resize(m_streams.size(), std::vector<SequenceDataPtr>(subsetSize));
// Collect all the chunks that we need
std::map<size_t, ChunkPtr> chunks;
if (m_currentChunk != nullptr)
{
chunks[m_currentChunkId] = m_currentChunk;
}
for (int i = 0; i < subsetSize; ++i)
{
const auto& sequenceDescription = descriptions[start + i];
auto it = chunks.find(sequenceDescription.m_chunkId);
if (it == chunks.end())
{
chunks[sequenceDescription.m_chunkId] = m_deserializer->GetChunk(sequenceDescription.m_chunkId);
}
}
auto process = [&](int i) -> void {
std::vector<SequenceDataPtr> sequence;
const auto& sequenceDescription = descriptions[start + i];
if (sequenceDescription.m_chunkId != m_currentChunkId)
auto it = chunks.find(sequenceDescription.m_chunkId);
if (it == chunks.end())
{
m_currentChunk = m_deserializer->GetChunk(sequenceDescription.m_chunkId);
m_currentChunkId = sequenceDescription.m_chunkId;
LogicError("Invalid chunk requested.");
}
m_currentChunk->GetSequence(sequenceDescription.m_id, sequence);
it->second->GetSequence(sequenceDescription.m_id, sequence);
for (int j = 0; j < m_streams.size(); ++j)
{
result.m_data[j][i] = sequence[j];
}
};
// TODO: This will be changed, when we move transformers under the (no-) randomizer, should not deal with multithreading here.
if (m_multithreadedGetNextSequences)
{
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < subsetSize; ++i)
process(i);
}
else
{
for (int i = 0; i < subsetSize; ++i)
process(i);
}
// Keep the last chunk for next time
m_currentChunkId = descriptions[start + subsetSize - 1].m_chunkId;
auto it = chunks.find(m_currentChunkId);
assert(it != chunks.end());
m_currentChunk = it->second;
return result;
}

Просмотреть файл

@ -21,7 +21,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
class NoRandomizer : public Transformer
{
public:
NoRandomizer(IDataDeserializerPtr deserializer);
NoRandomizer(IDataDeserializerPtr deserializer, bool multithreadedGetNextSequences = false);
virtual void Initialize(TransformerPtr next, const ConfigParameters& readerConfig) override;
virtual void StartEpoch(const EpochConfiguration& config) override;
@ -43,6 +43,10 @@ private:
IDataDeserializerPtr m_deserializer;
// Whether to get sequences using multiple thread.
// TODO temporary; should go away when transformers are moved closer to the deserializer
bool m_multithreadedGetNextSequences;
// Stream descriptions
std::vector<StreamDescriptionPtr> m_streams;

Просмотреть файл

@ -15,6 +15,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
class Packer
{
public:
// Sets current epoch configuration.
virtual void StartEpoch(const EpochConfiguration& config) = 0;
virtual Minibatch ReadMinibatch() = 0;
virtual ~Packer() {}
};

Просмотреть файл

@ -24,23 +24,26 @@ void PackerBase::StreamBuffer::Resize(size_t newSize)
});
}
void PackerBase::StartEpoch(const EpochConfiguration& config)
{
m_minibatchSize = config.m_minibatchSizeInSamples;
if (m_minibatchSize == 0)
{
LogicError("Minibatch size cannot be zero.");
}
}
PackerBase::PackerBase(MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams) :
m_transformer(transformer),
m_minibatchSize(minibatchSize),
m_minibatchSize(0),
m_outputStreamDescriptions(streams)
{
m_inputStreamDescriptions = m_transformer->GetStreamDescriptions();
assert(m_inputStreamDescriptions.size() != 0);
assert(m_inputStreamDescriptions.size() == m_outputStreamDescriptions.size());
if (m_minibatchSize == 0)
{
LogicError("Minibatch size cannot be zero.");
}
m_streamBuffers.reserve(m_outputStreamDescriptions.size());
// Sanity checks:

Просмотреть файл

@ -35,7 +35,6 @@ protected:
PackerBase(MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams);
typedef std::vector<SequenceDataPtr> StreamBatch;
@ -71,6 +70,10 @@ protected:
// Minibatch size in samples.
size_t m_minibatchSize;
public:
// Sets current epoch configuration.
virtual void StartEpoch(const EpochConfiguration& config) override;
};
inline void PackerBase::PackSparseSampleAsDense(char* destination, SparseSequenceDataPtr sequence,

Просмотреть файл

@ -29,6 +29,7 @@ struct EpochConfiguration
size_t m_minibatchSizeInSamples; // Maximum minibatch size for the epoch in samples
size_t m_totalEpochSizeInSamples; // Total size of the epoch in samples
size_t m_epochIndex; // Current epoch index [0 .. max number of epochs)
size_t m_truncationSize; // Truncation size in samples for truncated BPTT mode.
};
// Supported primitive element types, will be extended in the future.

Просмотреть файл

@ -22,20 +22,11 @@
<ProjectGuid>{F0A9637C-20DA-42F0-83D4-23B4704DE602}</ProjectGuid>
<RootNamespace>ReaderLib</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
@ -45,39 +36,10 @@
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<Optimization>Disabled</Optimization>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="BpttPacker.h" />
<ClInclude Include="Bundler.h" />
<ClInclude Include="ChunkRandomizer.h" />
<ClInclude Include="DataDeserializerBase.h" />
@ -98,9 +60,9 @@
<ClInclude Include="Reader.h" />
<ClInclude Include="ReaderShim.h" />
<ClInclude Include="Transformer.h" />
<ClInclude Include="TruncatedBpttPacker.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="BpttPacker.cpp" />
<ClCompile Include="Bundler.cpp" />
<ClCompile Include="ChunkRandomizer.cpp" />
<ClCompile Include="NoRandomizer.cpp" />
@ -110,8 +72,9 @@
<ClCompile Include="ReaderShim.cpp" />
<ClCompile Include="SequencePacker.cpp" />
<ClCompile Include="SequenceRandomizer.cpp" />
<ClCompile Include="TruncatedBpttPacker.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -61,7 +61,7 @@
<ClInclude Include="FramePacker.h">
<Filter>Packers</Filter>
</ClInclude>
<ClInclude Include="BpttPacker.h">
<ClInclude Include="TruncatedBpttPacker.h">
<Filter>Packers</Filter>
</ClInclude>
</ItemGroup>
@ -93,7 +93,7 @@
<ClCompile Include="FramePacker.cpp">
<Filter>Packers</Filter>
</ClCompile>
<ClCompile Include="BpttPacker.cpp">
<ClCompile Include="TruncatedBpttPacker.cpp">
<Filter>Packers</Filter>
</ClCompile>
</ItemGroup>

Просмотреть файл

@ -61,6 +61,12 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
size_t numSubsets,
size_t requestedEpochSamples /*= requestDataSize*/)
{
// For adaptive minibatch, make sure there are no outstanding reads.
if (m_prefetchTask.valid())
{
m_prefetchTask.wait();
}
EpochConfiguration config;
config.m_workerRank = subsetNum;
config.m_numberOfWorkers = numSubsets;
@ -71,12 +77,6 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
m_reader->StartEpoch(config);
m_endOfEpoch = false;
// For adaptive minibatch, make sure there are no outstanding reads.
if (m_prefetchTask.valid())
{
m_prefetchTask.wait();
}
m_prefetchTask = std::async(m_launchType, [this]()
{
return m_reader->ReadMinibatch();

Просмотреть файл

@ -17,9 +17,8 @@ public:
SequencePacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams) :
PackerBase(memoryProvider, transformer, minibatchSize, streams)
PackerBase(memoryProvider, transformer, streams)
{
}

Просмотреть файл

@ -76,29 +76,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::vector<RandomizedSequenceDescription> result;
result.reserve(sampleCount);
size_t sequenceOffsetInsideChunk = m_currentSequenceCursor - m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart;
RandomizedSequenceDescription* sequence = &m_sequenceWindow[m_currentChunkCursor - m_chunkWindowBegin][sequenceOffsetInsideChunk];
result.push_back(*sequence);
samples -= (int)sequence->m_numberOfSamples;
m_currentSequenceCursor++;
m_currentSampleCursor += (int)sequence->m_numberOfSamples;
if (sequenceOffsetInsideChunk + 1 >= m_randomizedChunks[m_currentChunkCursor].m_original->m_numberOfSequences)
{
// Moving to the next chunk.
MoveChunkCursor();
}
bool firstSequence = true;
while (samples > 0 && m_currentChunkCursor < m_randomizedChunks.size())
{
sequenceOffsetInsideChunk = m_currentSequenceCursor - m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart;
sequence = &m_sequenceWindow[m_currentChunkCursor - m_chunkWindowBegin][sequenceOffsetInsideChunk];
if (samples - sequence->m_numberOfSamples >= 0)
size_t sequenceOffsetInsideChunk = m_currentSequenceCursor - m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart;
RandomizedSequenceDescription* sequence = &m_sequenceWindow[m_currentChunkCursor - m_chunkWindowBegin][sequenceOffsetInsideChunk];
if (firstSequence || samples >= (int)sequence->m_numberOfSamples)
{
firstSequence = false;
result.push_back(*sequence);
m_currentSequenceCursor++;
samples -= (int)sequence->m_numberOfSamples;
m_currentSampleCursor += (int)sequence->m_numberOfSamples;
if (sequenceOffsetInsideChunk + 1 >= m_randomizedChunks[m_currentChunkCursor].m_original->m_numberOfSequences)
@ -107,6 +95,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
MoveChunkCursor();
}
}
// Always decrease the available number of samples.
samples -= (int)sequence->m_numberOfSamples;
}
return result;

Просмотреть файл

@ -56,7 +56,7 @@ public:
size_t streamId = appliedStreamIds[j];
auto& allSamples = samples.m_data[streamId];
#pragma omp parallel for ordered schedule(dynamic)
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < allSamples.size(); ++i)
{
allSamples[i] = Apply(allSamples[i], *m_inputStreams[streamId], *outputStreams[streamId]);

Просмотреть файл

@ -7,7 +7,7 @@
#define _SCL_SECURE_NO_WARNINGS
#include <deque>
#include "BpttPacker.h"
#include "TruncatedBpttPacker.h"
#include "ElementTypeUtils.h"
namespace Microsoft { namespace MSR { namespace CNTK {
@ -105,35 +105,60 @@ struct SequenceBuffer
vector<Slot> m_slots;
};
BpttPacker::BpttPacker(
TruncatedBPTTPacker::TruncatedBPTTPacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
size_t truncationSize,
const vector<StreamDescriptionPtr>& streams)
: PackerBase(memoryProvider, transformer, minibatchSize, streams),
m_truncationSize(truncationSize)
: PackerBase(memoryProvider, transformer, streams),
m_truncationSize(0)
{
auto sparseOutput = find_if(m_outputStreamDescriptions.begin(), m_outputStreamDescriptions.end(), [](const StreamDescriptionPtr& s){ return s->m_storageType == StorageType::sparse_csc; });
if (sparseOutput != m_outputStreamDescriptions.end())
{
// TODO: add support for sparse.
RuntimeError("Sparse output is not supported in BPTT mode.");
}
// Estimating the number of parallel sequences to pack (slots) from the minibatch size and truncation size.
m_numParallelSequences = max(1, (int)floor(m_minibatchSize / m_truncationSize));
// Preparing the buffers.
// Preparing layouts.
for (int i = 0; i < m_outputStreamDescriptions.size(); ++i)
{
const auto& stream = m_outputStreamDescriptions[i];
auto& buffer = m_streamBuffers[i];
buffer.Resize(m_numParallelSequences * m_truncationSize * GetSampleSize(stream));
m_sequenceBufferPerStream.push_back(make_shared<SequenceBuffer>(m_numParallelSequences));
auto pMBLayout = make_shared<MBLayout>();
pMBLayout->SetUniqueAxisName(L"BpttPacker");
pMBLayout->SetUniqueAxisName(L"TruncatedBPTTPacker");
m_currentLayouts.push_back(pMBLayout);
}
}
void TruncatedBPTTPacker::StartEpoch(const EpochConfiguration& config)
{
if (m_minibatchSize != config.m_minibatchSizeInSamples ||
m_truncationSize != config.m_truncationSize)
{
m_minibatchSize = config.m_minibatchSizeInSamples;
m_truncationSize = config.m_truncationSize;
if (m_minibatchSize == 0)
{
LogicError("Minibatch size cannot be zero.");
}
if (m_truncationSize == 0)
{
LogicError("Truncation size cannot be zero.");
}
// Estimating the number of parallel sequences to pack (slots) from the minibatch size and truncation size.
m_numParallelSequences = max(1, (int)floor(m_minibatchSize / m_truncationSize));
m_sequenceBufferPerStream.clear();
// Preparing the buffers.
for (int i = 0; i < m_outputStreamDescriptions.size(); ++i)
{
const auto& stream = m_outputStreamDescriptions[i];
auto& buffer = m_streamBuffers[i];
buffer.Resize(m_numParallelSequences * m_truncationSize * GetSampleSize(stream));
m_sequenceBufferPerStream.push_back(make_shared<SequenceBuffer>(m_numParallelSequences));
}
}
// Filling in the initial set of sequences
for (size_t slotIndex = 0; slotIndex < m_numParallelSequences; ++slotIndex)
@ -142,7 +167,7 @@ BpttPacker::BpttPacker(
}
}
Minibatch BpttPacker::ReadMinibatch()
Minibatch TruncatedBPTTPacker::ReadMinibatch()
{
Minibatch result;
@ -174,7 +199,7 @@ Minibatch BpttPacker::ReadMinibatch()
}
// Packs a slot of sequences into the minibatch.
void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId)
void TruncatedBPTTPacker::PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId)
{
auto& slot = m_sequenceBufferPerStream[streamIndex]->m_slots[slotIndex];
@ -274,7 +299,7 @@ void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequence
}
}
void BpttPacker::ReadSequencesToSlot(size_t slotIndex)
void TruncatedBPTTPacker::ReadSequencesToSlot(size_t slotIndex)
{
const auto& slot = m_sequenceBufferPerStream.front()->m_slots[slotIndex];
while (m_truncationSize > slot.AvailableNumberOfSamples())

Просмотреть файл

@ -18,18 +18,18 @@ typedef std::shared_ptr<SequenceBuffer> SequenceBufferPtr;
// A bptt packer that densely packs samples in parallel for GPU consumptions.
// TODO: Currently supports only packing of streams with sequences of equal length.
class BpttPacker : public PackerBase
class TruncatedBPTTPacker : public PackerBase
{
public:
BpttPacker(
TruncatedBPTTPacker(
MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
size_t truncationSize,
const std::vector<StreamDescriptionPtr>& streams);
virtual Minibatch ReadMinibatch() override;
virtual void StartEpoch(const EpochConfiguration& config) override;
private:
// Reads sequences to slot with the specified index.
// Number of slots = m_parallelNumberOfSequences
@ -65,6 +65,6 @@ private:
std::vector<MBLayoutPtr> m_currentLayouts;
};
typedef std::shared_ptr<BpttPacker> BpttPackerPtr;
typedef std::shared_ptr<TruncatedBPTTPacker> TruncatedBPTTPackerPtr;
}}}

Просмотреть файл

@ -72,7 +72,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -93,7 +93,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -114,20 +114,6 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\DataWriter.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader Condition="$(ReleaseBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="SparsePCReader.cpp">
<PrecompiledHeader Condition="$(ReleaseBuild)">Use</PrecompiledHeader>
@ -140,4 +126,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -71,7 +71,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -92,7 +92,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Math.lib;Common.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
@ -108,18 +108,6 @@
<ClInclude Include="UCIParser.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\DataWriter.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="dllmain.cpp">
<CompileAsManaged Condition="$(DebugBuild)">false</CompileAsManaged>
@ -138,4 +126,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -8,6 +8,7 @@
#include "Basics.h"
#include "Matrix.h"
#include "TensorView.h"
#include <memory> // for pair
#include <limits> // for isnan() and numeric_limits --TODO: is that the right header?
@ -40,9 +41,9 @@ struct CriterionAccumulator
{
// constructor
CriterionAccumulator(size_t numCriteria, DEVICEID_TYPE deviceId) :
m_aggregateCriterionValues(1, numCriteria, deviceId)
m_aggregateCriterionValues(make_shared<Matrix<ElemType>> (1, numCriteria, deviceId))
{
m_aggregateCriterionValues.SetValue(0);
m_aggregateCriterionValues->SetValue(0);
m_aggregateSampleCounts.assign(numCriteria, 0);
}
// 'i' is the index of the element we add into (multiple eval criteria share the same matrix object)
@ -63,7 +64,7 @@ struct CriterionAccumulator
if (m_aggregateSampleCounts[i] == 0)
return EpochCriterion(0, 0); // avoid unnecessary GPU access
else
return EpochCriterion(m_aggregateCriterionValues(0, i), m_aggregateSampleCounts[i]);
return EpochCriterion(m_aggregateCriterionValues->GetValue(0, i), m_aggregateSampleCounts[i]);
}
private:
@ -73,23 +74,41 @@ private:
const CriterionAccumulator& Accumulate(const std::vector<ComputationNodeBasePtr>& nodes, size_t i, size_t legacyNumSamples)
{
const auto& node = nodes[i]; // multiple nodes are managed by this struct
float beta = reset ? 0 : 1;
// Note: A future change will be that criterion nodes emit criteria per frame.
// In that case, we will do masking and an implicit reduction right here using TensorView.
size_t beta = reset ? 0 : 1;
size_t numSamples = GetNumSamples(nodes[i], legacyNumSamples);
#if 1
// For criterion nodes that emit criteria per frame, we will at this point
// do masking and an implicit reduction.
// get a TensorView of the criterion values to aggregate
FrameRange fr(node->GetMBLayout());
node->MaskMissingValueColumnsToZero(fr); // set gaps to zero, so that we can aggregate
auto criterionValue = node->As<ComputationNode<ElemType>>()->ValueTensorFor(SIZE_MAX, fr);
// get a TensorView of our aggregator
TensorShape shape{ m_aggregateCriterionValues->GetNumRows(), m_aggregateCriterionValues->GetNumCols() };
shape.NarrowTo(1, i, i + 1); // narrow to the single element that corresponds to the accumulator value
auto criterionAccumulator = TensorView<ElemType>(m_aggregateCriterionValues, shape);
// accumulate
// Note: If criterion is > [1 x 1] then inverse broadcasting will kick in and aggregate.
criterionAccumulator.DoCopyOf((float) beta, criterionValue, 1);
m_aggregateSampleCounts[i] = m_aggregateSampleCounts[i] * beta + numSamples;
#else
// temp solution until we add TensorView reduction
if (beta == 0)
{
Matrix<ElemType>::AssignElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
0, 0, m_aggregateCriterionValues, 0, i);
0, 0, *m_aggregateCriterionValues, 0, i);
m_aggregateSampleCounts[i] = numSamples;
}
else if (numSamples > 0) // avoid unnecessary GPU access
{
Matrix<ElemType>::AddElementToElement(dynamic_pointer_cast<ComputationNode<ElemType>>(node)->Value(),
0, 0, m_aggregateCriterionValues, 0, i);
0, 0, *m_aggregateCriterionValues, 0, i);
m_aggregateSampleCounts[i] += numSamples;
}
#endif
return *this;
}
// get the number of samples
@ -102,8 +121,8 @@ private:
}
private:
Matrix<ElemType> m_aggregateCriterionValues; // [1 x N]
vector<size_t> m_aggregateSampleCounts; // [N]
shared_ptr<Matrix<ElemType>> m_aggregateCriterionValues; // [1 x N]
vector<size_t> m_aggregateSampleCounts; // [N]
};
}}}

Просмотреть файл

@ -24,107 +24,49 @@
<RootNamespace>CNTK</RootNamespace>
<ProjectName>SGDLib</ProjectName>
</PropertyGroup>
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<PreBuildEventUseInBuild>false</PreBuildEventUseInBuild>
</PropertyGroup>
<PropertyGroup>
<LinkIncremental>$(DebugBuild)</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">$(SolutionDir)Source\1BitSGD;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(CNTK_ENABLE_ASGD)'=='true'">$(SolutionDir)Source\multiverso\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PrecompiledHeader>
</PrecompiledHeader>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">QUANTIZED_GRADIENT_AGGREGATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_ASGD)'=='true'">MULTIVERSO_SUPPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLibPath)</AdditionalLibraryDirectories>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<!-- TODO can we merge with above? -->
<ItemDefinitionGroup Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\1BitSGD;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(CNTK_ENABLE_ASGD)'=='true'">
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\multiverso\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_ASGD)'=='true'">MULTIVERSO_SUPPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">QUANTIZED_GRADIENT_AGGREGATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
<PreprocessorDefinitions>_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
<StackReserveSize>100000000</StackReserveSize>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">QUANTIZED_GRADIENT_AGGREGATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_ASGD)'=='true'">MULTIVERSO_SUPPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; kernel32.lib; user32.lib; shell32.lib; %(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; cudart64_70.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<DelayLoadDLLs>Math.dll</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(GpuBuild)">
<ClCompile>
@ -132,9 +74,10 @@
</ClCompile>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaLibPath)</AdditionalLibraryDirectories>
<DelayLoadDLLs>%(DelayLoadDLLs);nvml.dll;$(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>if exist "%ProgramW6432%\NVIDIA Corporation\NVSMI" xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" "$(TargetDir)"</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -181,22 +124,10 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Common\Config.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\DataReader.cpp" />
<ClCompile Include="..\Common\DataWriter.cpp" />
<ClCompile Include="..\Common\File.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\fileutil.cpp">
<PrecompiledHeader Condition="$(DebugBuild)">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="Profiler.cpp" />
<ClCompile Include="SGD.cpp" />
<ClCompile Include="stdafx.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше