Conflicts:
	MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h
	MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
	MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h
	MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h
	Tests/Speech/LSTM/cntk.config
	configure
This commit is contained in:
Amit Agarwal 2015-10-15 21:08:10 -07:00
Родитель c9f8e73dc4 96cb4b9872
Коммит 35b0e71b5a
53 изменённых файлов: 2031 добавлений и 468 удалений

1
.gitattributes поставляемый
Просмотреть файл

@ -1 +1,2 @@
run-test text eol=lf
run-test-common text eol=lf

100
CNTK.sln
Просмотреть файл

@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.21005.1
VisualStudioVersion = 12.0.40629.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
ProjectSection(ProjectDependencies) = postProject
@ -167,6 +167,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CheckInSuites", "CheckInSui
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SparsePCReader", "DataReader\SparsePCReader\SparsePCReader.vcxproj", "{CE429AA2-3778-4619-8FD1-49BA3B81197B}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Speech", "Speech", "{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}"
ProjectSection(SolutionItems) = preProject
@ -201,6 +204,11 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{19EE975B-2
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ParseConfig", "MachineLearning\ParseConfig\ParseConfig.vcxproj", "{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ImageReader", "DataReader\ImageReader\ImageReader.vcxproj", "{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKComputationNetworkLib", "MachineLearning\CNTKComputationNetworkLib\CNTKComputationNetworkLib.vcxproj", "{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
@ -349,6 +357,50 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUttera
Tests\Speech\LSTM\FullUtterance\testcases.yml = Tests\Speech\LSTM\FullUtterance\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DNN", "DNN", "{6994C86D-A672-4254-824A-51F4DFEB807F}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\DNN\cntk.config = Tests\Speech\DNN\cntk.config
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Parallel1BitQuantization", "Parallel1BitQuantization", "{FD88A77E-A839-4513-AC5C-AD92447BE229}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\DNN\Parallel1BitQuantization\baseline.cpu.txt = Tests\Speech\DNN\Parallel1BitQuantization\baseline.cpu.txt
Tests\Speech\DNN\Parallel1BitQuantization\baseline.gpu.txt = Tests\Speech\DNN\Parallel1BitQuantization\baseline.gpu.txt
Tests\Speech\DNN\Parallel1BitQuantization\baseline.windows.cpu.txt = Tests\Speech\DNN\Parallel1BitQuantization\baseline.windows.cpu.txt
Tests\Speech\DNN\Parallel1BitQuantization\baseline.windows.gpu.txt = Tests\Speech\DNN\Parallel1BitQuantization\baseline.windows.gpu.txt
Tests\Speech\DNN\Parallel1BitQuantization\run-test = Tests\Speech\DNN\Parallel1BitQuantization\run-test
Tests\Speech\DNN\Parallel1BitQuantization\testcases.yml = Tests\Speech\DNN\Parallel1BitQuantization\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ParallelNoQuantization", "ParallelNoQuantization", "{40F93928-5CA2-433A-A48C-C8E9A35D7079}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\DNN\ParallelNoQuantization\baseline.cpu.txt = Tests\Speech\DNN\ParallelNoQuantization\baseline.cpu.txt
Tests\Speech\DNN\ParallelNoQuantization\baseline.gpu.txt = Tests\Speech\DNN\ParallelNoQuantization\baseline.gpu.txt
Tests\Speech\DNN\ParallelNoQuantization\baseline.windows.cpu.txt = Tests\Speech\DNN\ParallelNoQuantization\baseline.windows.cpu.txt
Tests\Speech\DNN\ParallelNoQuantization\baseline.windows.gpu.txt = Tests\Speech\DNN\ParallelNoQuantization\baseline.windows.gpu.txt
Tests\Speech\DNN\ParallelNoQuantization\run-test = Tests\Speech\DNN\ParallelNoQuantization\run-test
Tests\Speech\DNN\ParallelNoQuantization\testcases.yml = Tests\Speech\DNN\ParallelNoQuantization\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DiscriminativePreTraining", "DiscriminativePreTraining", "{39B9BB97-D0E8-439A-8A1B-8DB8E7CF73C3}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\DNN\DiscriminativePreTraining\baseline.cpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.cpu.txt
Tests\Speech\DNN\DiscriminativePreTraining\baseline.gpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.gpu.txt
Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.cpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.cpu.txt
Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.gpu.txt = Tests\Speech\DNN\DiscriminativePreTraining\baseline.windows.gpu.txt
Tests\Speech\DNN\DiscriminativePreTraining\cntk_dpt.config = Tests\Speech\DNN\DiscriminativePreTraining\cntk_dpt.config
Tests\Speech\DNN\DiscriminativePreTraining\run-test = Tests\Speech\DNN\DiscriminativePreTraining\run-test
Tests\Speech\DNN\DiscriminativePreTraining\testcases.yml = Tests\Speech\DNN\DiscriminativePreTraining\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ndl", "ndl", "{09B8623A-BF5D-4499-B3B3-A9EFEA4C4B68}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\Data\ndl\add_layer.mel = Tests\Speech\Data\ndl\add_layer.mel
Tests\Speech\Data\ndl\dnn.txt = Tests\Speech\Data\ndl\dnn.txt
Tests\Speech\Data\ndl\dnn_1layer.txt = Tests\Speech\Data\ndl\dnn_1layer.txt
Tests\Speech\Data\ndl\macros.txt = Tests\Speech\Data\ndl\macros.txt
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
@ -432,51 +484,61 @@ Global
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|x64.Build.0 = Debug|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.ActiveCfg = Release|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.Build.0 = Release|x64
{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.ActiveCfg = Debug|x64
{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}.Debug|x64.Build.0 = Debug|x64
{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.ActiveCfg = Release|x64
{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68}
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68}
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{014DA766-B37B-4581-BC26-963EA5507931} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{98D2C32B-0C1F-4E19-A626-65F7BA4600CF} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{EA67F51F-1FE8-462D-9F3E-01161685AD59} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{DE1A06BA-EC5C-4E0D-BCA8-3EA555310C58} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{63024704-A2D7-497E-AD4B-5C10C6AA1374} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{F9BEB27E-8AF5-464E-8D45-0000D5AFA2D3} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
{889C1CCF-92B3-450B-B00D-FC9A9D5BE464} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
{4BBF2950-3DBD-469A-AD57-6CACBEBAF541} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{5F733BBA-FE83-4668-8F83-8B0E78A36619} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{19EE975B-232D-49F0-94C7-6F1C6424FB53} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
{6D1353D6-F196-466F-B886-F16D48759B20} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
{B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
{B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
{A4884465-CFBB-4A64-A9DE-690E1A63EF7E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
{9BD0A746-0BBD-45B6-B81C-053F03C26CFB} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{6994C86D-A672-4254-824A-51F4DFEB807F} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{FD88A77E-A839-4513-AC5C-AD92447BE229} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{40F93928-5CA2-433A-A48C-C8E9A35D7079} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{39B9BB97-D0E8-439A-8A1B-8DB8E7CF73C3} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{09B8623A-BF5D-4499-B3B3-A9EFEA4C4B68} = {5F733BBA-FE83-4668-8F83-8B0E78A36619}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -159,9 +159,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// HAH! This function is only ever used for Decimate(). It can completely go away, as can methods of the same name in the readers!
//bool RequireSentenceSeg() const { return m_dataIsSequential; } // this is the name of a function on DataReader which really belongs here
#if 0 // (I thought I need this, but don't. Keeping it anyway, maybe we need it again in the future.)
// compute the number of actual samples in this layout (not counting NoLabel ones)
// This is only expensive for a weirdo configuration of multiple variable-length sequences that still normalizes the gradient over the total # seen samples.
// This is used by MeanNode and InvStdDevNode.
size_t DetermineActualNumSamples() const
{
size_t n = GetNumTimeSteps() * GetNumParallelSequences();
@ -178,7 +177,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
return n;
}
#endif
private:
size_t m_numTimeSteps;

Просмотреть файл

@ -84,10 +84,12 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec
#include <locale> // std::wstring_convert
#include <string>
#include <algorithm> // for transform()
#include <mutex>
#include <unordered_map>
#include <chrono>
#include <thread>
#include <stack>
#include <mutex>
#include <memory>
#ifdef _MSC_VER
#include <codecvt> // std::codecvt_utf8
#endif
@ -1004,4 +1006,59 @@ static inline std::wstring FormatWin32Error(DWORD error)
return res;
}
#endif // _WIN32
// Very simple version of thread-safe stack. Add other functions as needed.
template<typename T>
class conc_stack
{
public:
typedef typename std::stack<T>::value_type value_type;
conc_stack() {}
value_type pop_or_create(std::function<value_type()> factory)
{
std::lock_guard<std::mutex> g(m_locker);
if (m_stack.size() == 0)
return factory();
auto res = std::move(m_stack.top());
m_stack.pop();
return res;
}
void push(const value_type& item)
{
std::lock_guard<std::mutex> g(m_locker);
m_stack.push(item);
}
void push(value_type&& item)
{
std::lock_guard<std::mutex> g(m_locker);
m_stack.push(std::forward<value_type>(item));
}
public:
conc_stack(const conc_stack&) = delete;
conc_stack& operator=(const conc_stack&) = delete;
conc_stack(conc_stack&&) = delete;
conc_stack& operator=(conc_stack&&) = delete;
private:
std::stack<value_type> m_stack;
std::mutex m_locker;
};
// make_unique was added in GCC 4.9.0
#if __GNUC__ >= 4 && __GNUC_MINOR__ < 9
namespace std
{
template<typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args)
{
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
}
#endif
#endif // _BASETYPES_

Просмотреть файл

@ -494,10 +494,19 @@ public:
// check for custom separator character
// If the opening brace is immediately followed by any of the customSeparators,
// change m_separator (inside seps) to that character.
// TODO: document what this is for, where it is used [fseide]
// The parser lets you change the default separator to something else. For example the default separator for an array is usually the : (I think)
// (12:45:23:46)
// However if you are using strings, and one of those strings contains a :, you might want to change the separator to something else:
// (;this;is;a;path:;c:\mydir\stuff)
//
// This will fail for
// (..\dirname,something else)
// Hence there is an ugly fix for it below. This will go away when we replace all configuration parsing by BrainScript.
const static std::string customSeperators = "`~!@$%^&*_-+|:;,?.";
if (customSeperators.find(stringParse[tokenStart]) != npos)
if (customSeperators.find(stringParse[tokenStart]) != npos
&& stringParse.substr(tokenStart).find("..") != 0 && stringParse.substr(tokenStart).find(".\\") != 0 && stringParse.substr(tokenStart).find("./") != 0 // [fseide] otherwise this will nuke leading . or .. in a pathname... Aargh!
)
{
char separator = stringParse[tokenStart];
// this was m_separator; on content level, we change it to a custom separator (it gets changed back when we exit content level)

Просмотреть файл

@ -0,0 +1,30 @@
//
// <copyright file="Exports.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// Exports.cpp : Defines the exported functions for the DLL application.
//
#include "stdafx.h"
#define DATAREADER_EXPORTS
#include "DataReader.h"
#include "ImageReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
{
*preader = new ImageReader<ElemType>();
}
extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
{
GetReader(preader);
}
extern "C" DATAREADER_API void GetReaderD(IDataReader<double>** preader)
{
GetReader(preader);
}
}}}

Просмотреть файл

@ -0,0 +1,470 @@
//
// <copyright company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#include "stdafx.h"
#define DATAREADER_EXPORTS // creating the exports here
#include <algorithm>
#include <fstream>
#include <sstream>
#include <opencv2/opencv.hpp>
#include "DataReader.h"
#include "ImageReader.h"
#include "commandArgUtil.h"
namespace Microsoft { namespace MSR { namespace CNTK {
static bool AreEqual(const std::string& s1, const std::string& s2)
{
return std::equal(s1.begin(), s1.end(), s2.begin(), [](const char& a, const char& b) { return std::tolower(a) == std::tolower(b); });
};
//-------------------
// Transforms
class ITransform
{
public:
virtual void Init(const ConfigParameters& config) = 0;
virtual void Apply(cv::Mat& mat) = 0;
ITransform() {};
virtual ~ITransform() {};
public:
ITransform(const ITransform&) = delete;
ITransform& operator=(const ITransform&) = delete;
ITransform(ITransform&&) = delete;
ITransform& operator=(ITransform&&) = delete;
};
class CropTransform : public ITransform
{
public:
CropTransform(unsigned int seed) : m_seed(seed)
{
}
void Init(const ConfigParameters& config)
{
m_cropType = ParseCropType(config("cropType", ""));
std::stringstream ss{ config("cropRatio", "1") };
std::string token{ "" };
if (std::getline(ss, token, ':'))
{
m_cropRatioMin = std::stof(token);
m_cropRatioMax = std::getline(ss, token, ':') ? std::stof(token) : m_cropRatioMin;
}
if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
!(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
m_cropRatioMin > m_cropRatioMax)
{
RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must <= cropMax");
}
m_jitterType = ParseJitterType(config("jitterType", ""));
if (!config.ExistsCurrent("hflip"))
m_hFlip = m_cropType == CropType::Random;
else
m_hFlip = std::stoi(config("hflip")) != 0;
}
void Apply(cv::Mat& mat)
{
auto seed = m_seed;
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
double ratio = 1;
switch (m_jitterType)
{
case RatioJitterType::None:
ratio = m_cropRatioMin;
break;
case RatioJitterType::UniRatio:
ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
break;
default:
RuntimeError("Jitter type currently not implemented.");
}
mat = mat(GetCropRect(m_cropType, mat.rows, mat.cols, ratio, *rng));
if (m_hFlip && std::bernoulli_distribution()(*rng))
cv::flip(mat, mat, 1);
m_rngs.push(std::move(rng));
}
private:
using UniRealT = std::uniform_real_distribution<double>;
using UniIntT = std::uniform_int_distribution<int>;
enum class CropType { Center = 0, Random = 1 };
enum class RatioJitterType
{
None = 0,
UniRatio = 1,
UniLength = 2,
UniArea = 3
};
CropType ParseCropType(const std::string& src)
{
if (src.empty() || AreEqual(src, "center"))
return CropType::Center;
if (AreEqual(src, "random"))
return CropType::Random;
RuntimeError("Invalid crop type: %s.", src.c_str());
}
RatioJitterType ParseJitterType(const std::string& src)
{
if (src.empty() || AreEqual(src, "none"))
return RatioJitterType::None;
if (AreEqual(src, "uniratio"))
return RatioJitterType::UniRatio;
if (AreEqual(src, "unilength"))
return RatioJitterType::UniLength;
if (AreEqual(src, "uniarea"))
return RatioJitterType::UniArea;
RuntimeError("Invalid jitter type: %s.", src.c_str());
}
cv::Rect GetCropRect(CropType type, int crow, int ccol, double cropRatio, std::mt19937& rng)
{
assert(crow > 0);
assert(ccol > 0);
assert(0 < cropRatio && cropRatio <= 1.0);
int cropSize = static_cast<int>(std::min(crow, ccol) * cropRatio);
int xOff = -1;
int yOff = -1;
switch (type)
{
case CropType::Center:
xOff = (ccol - cropSize) / 2;
yOff = (crow - cropSize) / 2;
break;
case CropType::Random:
xOff = UniIntT(0, ccol - cropSize)(rng);
yOff = UniIntT(0, crow - cropSize)(rng);
break;
default:
assert(false);
}
assert(0 <= xOff && xOff <= ccol - cropSize);
assert(0 <= yOff && yOff <= crow - cropSize);
return cv::Rect(xOff, yOff, cropSize, cropSize);
}
private:
unsigned int m_seed;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
CropType m_cropType;
double m_cropRatioMin;
double m_cropRatioMax;
RatioJitterType m_jitterType;
bool m_hFlip;
};
class ScaleTransform : public ITransform
{
public:
ScaleTransform(int dataType, unsigned int seed) : m_dataType(dataType), m_seed(seed)
{
assert(m_dataType == CV_32F || m_dataType == CV_64F);
m_interpMap.emplace("nearest", cv::INTER_NEAREST);
m_interpMap.emplace("linear", cv::INTER_LINEAR);
m_interpMap.emplace("cubic", cv::INTER_CUBIC);
m_interpMap.emplace("lanczos", cv::INTER_LANCZOS4);
}
void Init(const ConfigParameters& config)
{
m_imgWidth = config("width");
m_imgHeight = config("height");
m_imgChannels = config("channels");
size_t cfeat = m_imgWidth * m_imgHeight * m_imgChannels;
if (cfeat == 0 || cfeat > std::numeric_limits<size_t>().max() / 2)
RuntimeError("Invalid image dimensions.");
m_interp.clear();
std::stringstream ss{ config("interpolations", "") };
for (std::string token = ""; std::getline(ss, token, ':');)
{
// Explicit cast required for GCC.
std::transform(token.begin(), token.end(), token.begin(), (int (*)(int))std::tolower);
StrToIntMapT::const_iterator res = m_interpMap.find(token);
if (res != m_interpMap.end())
m_interp.push_back((*res).second);
}
if (m_interp.size() == 0)
m_interp.push_back(cv::INTER_LINEAR);
}
void Apply(cv::Mat& mat)
{
// If matrix has not been converted to the right type, do it now as rescaling requires floating point type.
if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
mat.convertTo(mat, m_dataType);
auto seed = m_seed;
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });
assert(m_interp.size() > 0);
cv::resize(mat, mat, cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0, 0,
m_interp[UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng)]);
m_rngs.push(std::move(rng));
}
private:
using UniIntT = std::uniform_int_distribution<int>;
unsigned int m_seed;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;
int m_dataType;
using StrToIntMapT = std::unordered_map<std::string, int>;
StrToIntMapT m_interpMap;
std::vector<int> m_interp;
size_t m_imgWidth;
size_t m_imgHeight;
size_t m_imgChannels;
};
class MeanTransform : public ITransform
{
public:
MeanTransform()
{
}
void Init(const ConfigParameters& config)
{
std::wstring meanFile = config(L"meanFile", L"");
if (meanFile.empty())
m_meanImg.release();
else
{
cv::FileStorage fs;
// REVIEW alexeyk: this sort of defeats the purpose of using wstring at all...
auto fname = msra::strfun::utf8(meanFile);
fs.open(fname, cv::FileStorage::READ);
if (!fs.isOpened())
RuntimeError("Could not open file: " + fname);
fs["MeanImg"] >> m_meanImg;
int cchan;
fs["Channel"] >> cchan;
int crow;
fs["Row"] >> crow;
int ccol;
fs["Col"] >> ccol;
if (cchan * crow * ccol != m_meanImg.channels() * m_meanImg.rows * m_meanImg.cols)
RuntimeError("Invalid data in file: " + fname);
fs.release();
m_meanImg = m_meanImg.reshape(cchan, crow);
}
}
void Apply(cv::Mat& mat)
{
assert(m_meanImg.size() == cv::Size(0, 0) || (m_meanImg.size() == mat.size() && m_meanImg.channels() == mat.channels()));
// REVIEW alexeyk: check type conversion (float/double).
if (m_meanImg.size() == mat.size())
mat = mat - m_meanImg;
}
private:
cv::Mat m_meanImg;
};
//-------------------
// ImageReader
template<class ElemType>
ImageReader<ElemType>::ImageReader() : m_seed(0), m_rng(m_seed), m_imgListRand(true), m_pMBLayout(make_shared<MBLayout>())
{
m_transforms.push_back(std::make_unique<CropTransform>(m_seed));
m_transforms.push_back(std::make_unique<ScaleTransform>(sizeof(ElemType) == 4 ? CV_32F : CV_64F, m_seed));
m_transforms.push_back(std::make_unique<MeanTransform>());
}
template<class ElemType>
ImageReader<ElemType>::~ImageReader()
{
}
template<class ElemType>
void ImageReader<ElemType>::Init(const ConfigParameters& config)
{
using SectionT = std::pair<std::string, ConfigParameters>;
auto gettter = [&](const std::string& paramName) -> SectionT
{
auto sect = std::find_if(config.begin(), config.end(),
[&](const std::pair<std::string, ConfigValue>& p) { return ConfigParameters(p.second).ExistsCurrent(paramName); });
if (sect == config.end())
RuntimeError("ImageReader requires " + paramName + " parameter.");
return{ (*sect).first, ConfigParameters((*sect).second) };
};
// REVIEW alexeyk: currently support only one feature and label section.
SectionT featSect{ gettter("width") };
m_featName = msra::strfun::utf16(featSect.first);
// REVIEW alexeyk: w, h and c will be read again in ScaleTransform.
size_t w = featSect.second("width");
size_t h = featSect.second("height");
size_t c = featSect.second("channels");
m_featDim = w * h * c;
// Initialize transforms.
for (auto& t: m_transforms)
t->Init(featSect.second);
SectionT labSect{ gettter("labelDim") };
m_labName = msra::strfun::utf16(labSect.first);
m_labDim = labSect.second("labelDim");
std::string mapPath = config("file");
std::ifstream mapFile(mapPath);
if (!mapFile)
RuntimeError("Could not open " + mapPath + " for reading.");
std::string line{ "" };
for (size_t cline = 0; std::getline(mapFile, line); cline++)
{
std::stringstream ss{ line };
std::string imgPath;
std::string clsId;
if (!std::getline(ss, imgPath, '\t') || !std::getline(ss, clsId, '\t'))
RuntimeError("Invalid map file format, must contain 2 tab-delimited columns: %s, line: %d.", mapPath.c_str(), cline);
files.push_back({ imgPath, std::stoi(clsId) });
}
std::string rand = config("randomize", "auto");
if (AreEqual(rand, "none"))
m_imgListRand = false;
else if (!AreEqual(rand, "auto"))
RuntimeError("Only Auto and None are currently supported.");
m_epochStart = 0;
m_mbStart = 0;
}
template<class ElemType>
void ImageReader<ElemType>::Destroy()
{
}
template<class ElemType>
void ImageReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples)
{
assert(mbSize > 0);
assert(requestedEpochSamples > 0);
if (m_imgListRand)
std::shuffle(files.begin(), files.end(), m_rng);
m_epochSize = (requestedEpochSamples == requestDataSize ? files.size() : requestedEpochSamples);
m_mbSize = mbSize;
// REVIEW alexeyk: if user provides epoch size explicitly then we assume epoch size is a multiple of mbsize, is this ok?
assert(requestedEpochSamples == requestDataSize || (m_epochSize % m_mbSize) == 0);
m_epoch = epoch;
m_epochStart = m_epoch * m_epochSize;
if (m_epochStart >= files.size())
{
m_epochStart = 0;
m_mbStart = 0;
}
m_featBuf.resize(m_mbSize * m_featDim);
m_labBuf.resize(m_mbSize * m_labDim);
}
template<class ElemType>
bool ImageReader<ElemType>::GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices)
{
assert(matrices.size() > 0);
assert(matrices.find(m_featName) != matrices.end());
assert(m_mbSize > 0);
Matrix<ElemType>& features = *matrices[m_featName];
Matrix<ElemType>& labels = *matrices[m_labName];
if (m_mbStart >= files.size() || m_mbStart >= m_epochStart + m_epochSize)
return false;
size_t mbLim = m_mbStart + m_mbSize;
if (mbLim > files.size())
mbLim = files.size();
std::fill(m_labBuf.begin(), m_labBuf.end(), static_cast<ElemType>(0));
#pragma omp parallel for ordered schedule(dynamic)
for (long long i = 0; i < static_cast<long long>(mbLim - m_mbStart); i++)
{
const auto& p = files[i + m_mbStart];
cv::Mat img{ cv::imread(p.first, cv::IMREAD_COLOR) };
for (auto& t: m_transforms)
t->Apply(img);
assert(img.isContinuous());
auto data = reinterpret_cast<ElemType*>(img.ptr());
std::copy(data, data + m_featDim, m_featBuf.begin() + m_featDim * i);
m_labBuf[m_labDim * i + p.second] = 1;
}
size_t mbSize = mbLim - m_mbStart;
features.SetValue(m_featDim, mbSize, m_featBuf.data(), matrixFlagNormal);
labels.SetValue(m_labDim, mbSize, m_labBuf.data(), matrixFlagNormal);
m_pMBLayout->Init(mbSize, 1, false);
m_mbStart = mbLim;
return true;
}
template<class ElemType>
bool ImageReader<ElemType>::DataEnd(EndDataType endDataType)
{
bool ret = false;
switch (endDataType)
{
case endDataNull:
assert(false);
break;
case endDataEpoch:
ret = m_mbStart < m_epochStart + m_epochSize;
break;
case endDataSet:
ret = m_mbStart >= files.size();
break;
case endDataSentence:
ret = true;
break;
}
return ret;
}
template<class ElemType>
void ImageReader<ElemType>::SetRandomSeed(unsigned int seed)
{
m_seed = seed;
m_rng.seed(m_seed);
}
template class ImageReader<double>;
template class ImageReader<float>;
}}}

Просмотреть файл

@ -0,0 +1,68 @@
//
// <copyright company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include <random>
#include <memory>
#include "DataReader.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// REVIEW alexeyk: can't put it into ImageReader itself as ImageReader is a template.
class ITransform;
template<class ElemType>
class ImageReader : public IDataReader<ElemType>
{
public:
ImageReader();
virtual ~ImageReader();
ImageReader(const ImageReader&) = delete;
ImageReader& operator=(const ImageReader&) = delete;
ImageReader(ImageReader&&) = delete;
ImageReader& operator=(ImageReader&&) = delete;
void Init(const ConfigParameters& config) override;
void Destroy() override;
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize) override;
bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) override;
bool DataEnd(EndDataType endDataType) override;
size_t GetNumParallelSequences() override { return m_pMBLayout->GetNumParallelSequences(); }
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) override { pMBLayout->CopyFrom(m_pMBLayout); };
void SetRandomSeed(unsigned int seed) override;
private:
unsigned int m_seed;
std::mt19937 m_rng;
std::vector<std::unique_ptr<ITransform>> m_transforms;
std::wstring m_featName;
std::wstring m_labName;
size_t m_featDim;
size_t m_labDim;
using StrIntPairT = std::pair<std::string, int>;
std::vector<StrIntPairT> files;
size_t m_epochSize;
size_t m_mbSize;
size_t m_epoch;
size_t m_epochStart;
size_t m_mbStart;
std::vector<ElemType> m_featBuf;
std::vector<ElemType> m_labBuf;
bool m_imgListRand;
MBLayoutPtr m_pMBLayout;
};
}}}

Просмотреть файл

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{9BD0A746-0BBD-45B6-B81C-053F03C26CFB}</ProjectGuid>
<SccProjectName>
</SccProjectName>
<SccAuxPath>
</SccAuxPath>
<SccLocalPath>
</SccLocalPath>
<SccProvider>
</SccProvider>
<Keyword>Win32Proj</Keyword>
<RootNamespace>ImageReader</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Choose>
<When Condition="Exists('$(OPENCV_PATH)')">
<PropertyGroup>
<HasOpenCV>true</HasOpenCV>
</PropertyGroup>
</When>
<Otherwise>
<PropertyGroup>
<HasOpenCV>false</HasOpenCV>
</PropertyGroup>
</Otherwise>
</Choose>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IncludePath>..\..\common\include;..\..\math\math;$(OPENCV_PATH)\include;$(IncludePath);</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(OPENCV_PATH)\x64\vc12\lib;$(LibraryPath);</LibraryPath>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
<OpenCVLib Condition="$(HasOpenCV)">opencv_world300.lib</OpenCVLib>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<PreprocessorDefinitions>WIN32;_WINDOWS;_USRDLL;UCIREADER_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>CNTKMath.lib;$(OpenCVLib);%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<PostBuildEvent>
<Command Condition="$(HasOpenCV)">xcopy /Y $(OPENCV_PATH)\x64\vc12\bin\opencv_world300.dll $(TargetDir)</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h" />
<ClInclude Include="..\..\Common\Include\DataReader.h" />
<ClInclude Include="..\..\Common\Include\File.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />
<ClInclude Include="ImageReader.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\ConfigFile.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\File.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\DebugUtil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="Exports.cpp">
<ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="ImageReader.cpp">
<ExcludedFromBuild Condition="!$(HasOpenCV)">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
<Target Name="CheckDependencies">
<Warning Condition="!$(HasOpenCV)"
Text="ImageReader requires OpenCV library v3.0 or higher to build. Please install the library from http://opencv.org/downloads.html and set OPENCV_PATH environment variable to OpenCV build folder (e.g. C:\src\opencv\build)." />
</Target>
</Project>

Просмотреть файл

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="Exports.cpp" />
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="..\..\Common\DataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\ConfigFile.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\fileutil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\File.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="dllmain.cpp" />
<ClCompile Include="ImageReader.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="..\..\Common\Include\basetypes.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\DataReader.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\File.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\fileutil.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="ImageReader.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="Common">
<UniqueIdentifier>{0D0EFA10-72A8-4078-840A-B7F76AFEC0A4}</UniqueIdentifier>
</Filter>
<Filter Include="Common\Include">
<UniqueIdentifier>{C6F55578-121A-4D7C-8F57-4172BC5C463B}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,21 @@
//
// <copyright file="dllmain.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// dllmain.cpp : Defines the entry point for the DLL application.
#include "stdafx.h"
BOOL APIENTRY DllMain(HMODULE /*hModule*/, DWORD ul_reason_for_call, LPVOID /*lpReserved*/)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}

Просмотреть файл

@ -0,0 +1,8 @@
// stdafx.cpp : source file that includes just the standard includes
// ParseNumber.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

Просмотреть файл

@ -0,0 +1,17 @@
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "Platform.h"
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms
#include "targetver.h"
#ifdef __WINDOWS__
#include "windows.h"
#endif
#include <stdio.h>
#include <math.h>
// TODO: reference additional headers your program requires here

Просмотреть файл

@ -0,0 +1,9 @@
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#ifdef __WINDOWS__
#include <SDKDDKVer.h>
#endif

Просмотреть файл

@ -0,0 +1,102 @@
WorkDir=.
ModelDir=$WorkDir$/_out/$ConfigName$
stderr=$WorkDir$/_out/$ConfigName$
ndlMacros=$WorkDir$/Macros.ndl
precision=float
deviceId=Auto
command=Train:AddTop5Eval:Test
Train=[
action=train
modelPath=$ModelDir$/AlexNet
traceLevel=1
NDLNetworkBuilder=[
networkDescription=$WorkDir$/AlexNet.ndl
]
SGD=[
epochSize=0
minibatchSize=128
learningRatesPerMB=0.01*20:0.003*12:0.001*28:0.0003
momentumPerMB=0.9
maxEpochs=90
gradUpdateType=None
L2RegWeight=0.0005
dropoutRate=0*5:0.5
numMBsToShowResult=10
]
reader=[
readerType=ImageReader
# Map file which maps images to labels using the following format:
# <full path to image><tab><numerical label (0-based class id)>
# Example:
# C:\Data\ImageNet\2012\train\n01440764\n01440764_10026.JPEG<tab>0
file=$WorkDir$/train_map.txt
# Randomize images before every epoch. Possible values: None, Auto. Default: Auto.
randomize=Auto
features=[
# Below are the required parameters.
width=224
height=224
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType=Random
# Horizontal random flip, will be enabled by default if cropType=Random
#hflip=0
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.9
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
jitterType=UniRatio
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.
interpolations=Linear
# Stores mean values for each pixel in OpenCV matrix XML format.
meanFile=$WorkDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000
]
]
]
AddTop5Eval=[
action=edit
CurModel=$ModelDir$/AlexNet
NewModel=$ModelDir$/AlexNet.Top5
editPath=$WorkDir$/add_top5_layer.mel
]
Test=[
action=test
modelPath=$ModelDir$/AlexNet.Top5
# Set minibatch size for testing.
minibatchSize=128
NDLNetworkBuilder=[
networkDescription=$WorkDir$/AlexNet.ndl
]
reader=[
readerType=ImageReader
file=$WorkDir$/val_map.txt
randomize=None
features=[
width=224
height=224
channels=3
cropType=Center
meanFile=$WorkDir$/ImageNet1K_mean.xml
]
labels=[
labelDim=1000
]
]
]

Просмотреть файл

@ -0,0 +1,108 @@
load=ndlMnistMacros
run=DNN
ndlMnistMacros = [
ImageW = 224
ImageH = 224
ImageC = 3
LabelDim = 1000
features = ImageInput(ImageW, ImageH, ImageC, tag = feature)
labels = Input(LabelDim, tag = label)
conv1WScale = 0.95
conv1BValue = 0
conv2WScale = 2
conv2BValue = 1
conv3WScale = 2.07
conv3BValue = 0
conv4WScale = 2.9
conv4BValue = 1
conv5WScale = 2.4
conv5BValue = 1
fc1WScale = 6.4
fc1BValue = 1
fc2WScale = 3.2
fc2BValue = 1
fc3WScale = 3.2
fc3BValue = 1
]
DNN=[
# conv1
kW1 = 11
kH1 = 11
cMap1 = 64
hStride1 = 3
vStride1 = 3
# weight[cMap1, kW1 * kH1 * ImageC]
conv1_act = ConvReLULayer(features, cMap1, 363, kW1, kH1, hStride1, vStride1, conv1WScale, conv1BValue)
# pool1
pool1W = 3
pool1H = 3
pool1hStride = 2
pool1vStride = 2
pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
# conv2
kW2 = 5
kH2 = 5
cMap2 = 192
hStride2 = 1
vStride2 = 1
# weight[cMap2, kW2 * kH2 * cMap1]
conv2_act = ConvReLULayer(pool1, cMap2, 1600, kW2, kH2, hStride2, vStride2, conv2WScale, conv2BValue)
# pool2
pool2W = 3
pool2H = 3
pool2hStride = 2
pool2vStride = 2
pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
# conv3
kW3 = 3
kH3 = 3
cMap3 = 384
hStride3 = 1
vStride3 = 1
# weight[cMap3, kW3 * kH3 * cMap2]
conv3_act = ConvReLULayer(pool2, cMap3, 1728, kW3, kH3, hStride3, vStride3, conv3WScale, conv3BValue)
# conv4
kW4 = 3
kH4 = 3
cMap4 = 256
hStride4 = 1
vStride4 = 1
# weight[cMap4, kW4 * kH4 * cMap3]
conv4_act = ConvReLULayer(conv3_act, cMap4, 3456, kW4, kH4, hStride4, vStride4, conv4WScale, conv4BValue)
# conv5
kW5 = 3
kH5 = 3
cMap5 = 256
hStride5 = 1
vStride5 = 1
# weight[cMap5, kW5 * kH5 * cMap4]
conv5_act = ConvReLULayer(conv4_act, cMap5, 2304, kW5, kH5, hStride5, vStride5, conv5WScale, conv5BValue)
# pool3
pool3W = 3
pool3H = 3
pool3hStride = 2
pool3vStride = 2
pool3 = MaxPooling(conv5_act, pool3W, pool3H, pool3hStride, pool3vStride)
hiddenDim = 4096
h1 = DNNReLULayer(16384, hiddenDim, pool3, fc1WScale, fc1BValue)
h1_d = Dropout(h1)
h2 = DNNReLULayer(hiddenDim, hiddenDim, h1_d, fc2WScale, fc2BValue)
h2_d = Dropout(h2)
ol = DNNLastLayer(hiddenDim, labelDim, h2_d, fc3WScale, fc3BValue)
CE = CrossEntropyWithSoftmax(labels, ol, tag = Criteria)
Err = ErrorPrediction(labels, ol, tag = Eval)
OutputNodes = ol
]

Просмотреть файл

@ -0,0 +1,25 @@
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue)
{
convW = Parameter(outMap, inWCount, init = Gaussian, initValueScale = wScale)
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding = true)
convB = Parameter(outMap, 1, init = fixedValue, value = bValue)
convPlusB = Plus(conv, convB);
act = RectifiedLinear(convPlusB);
}
DNNReLULayer(inDim, outDim, x, wScale, bValue)
{
W = Parameter(outDim, inDim, init = Gaussian, initValueScale = wScale)
b = Parameter(outDim, init = fixedValue, value = bValue)
t = Times(W, x)
z = Plus(t, b)
y = RectifiedLinear(z)
}
DNNLastLayer(hiddenDim, labelDim, x, wScale, bValue)
{
W = Parameter(labelDim, hiddenDim, init = Gaussian, initValueScale = wScale)
b = Parameter(labelDim, init = fixedValue, value = bValue)
t = Times(W, x)
z = Plus(t, b)
}

Просмотреть файл

@ -0,0 +1,6 @@
m1=LoadModel($CurModel$, format=cntk)
SetDefaultModel(m1)
ErrTop5 = ErrorPrediction(labels, OutputNodes.z, Const(5), tag = Eval)
SaveModel(m1, $NewModel$, format=cntk)

Просмотреть файл

@ -1505,8 +1505,8 @@ int wmain1(int argc, wchar_t* argv[]) // called from wmain which is a wrapper
//dump config info
fprintf(stderr, "running on %s at %s\n", GetHostName().c_str(), timestamp.c_str());
fprintf(stderr, "command line options: \n");
for (int i = 1; i < argc; i++)
fprintf(stderr, "command line: \n");
for (int i = 0; i < argc; i++)
{
fprintf(stderr, "%s ", WCharToString(argv[i]).c_str());
}

Просмотреть файл

@ -40,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
ParallelNode(DEVICEID_TYPE deviceId, const wstring & name) :
Base(deviceId, name)
{}
{ }
virtual void ComputeInputPartial(const size_t inputIndex)
{
@ -201,13 +201,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class PreComputedNode : public ComputationNodeNonLooping/*ComputationNode*/<ElemType>
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembers;
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName;
public:
//virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) = 0;
PreComputedNode(DEVICEID_TYPE deviceId, const wstring & name) :
Base(deviceId, name),
m_hasComputed(false)
{}
{ }
// interface through which this node is operated on are these two functions
@ -234,7 +234,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void LoadFromFile(File& fstream, size_t modelVersion) override
{
Base::LoadFromFile(fstream, modelVersion);
fstream >> m_hasComputed;
CreateMatrixIfNull(m_functionValues);
fstream >> FunctionValues();
@ -257,11 +256,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
Base::Validate(isFinalValidationPass);
if (!Inputs(0)->HasMBLayout())
InvalidArgument("%ls %ls operation requires its input to come in minibatches of samples.", NodeName().c_str(), OperationName().c_str());
m_pMBLayout = nullptr; // this node does not hold mini-batch data
if (!m_hasComputed) // this node retains state, and state gets destroyed by Resize(), so we must be careful
Resize(Inputs(0)->GetNumRows(), 1);
else
VerifySize(Inputs(0)->GetNumRows(), 1);
m_pMBLayout = nullptr; // this node does not hold mini-batch data
InferImageDimsFromInputs();
}
@ -373,16 +374,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
Matrix<ElemType> &avg = FunctionValues();
// set gaps to zero, since we are reducing in time
Inputs(0)->MaskMissingValuesColumnsToZero();
auto & samples = Inputs(0)->FunctionValues();
auto & avg = FunctionValues();
#if 1//NANCHECK
samples.HasNan("Mean-Samples");
#endif
size_t numNewSamples = samples.GetNumCols();
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + samples.GetNumCols()), samples, false,
ConstOnes(numNewSamples, 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / (m_numSamples + numNewSamples), avg);
size_t numNewSamples = Inputs(0)->GetMBLayout()->DetermineActualNumSamples();
size_t totalNumSamples = m_numSamples + numNewSamples;
if (totalNumSamples == 0) totalNumSamples = 1; // 0/0=1 in this context
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, samples, false,
ConstOnes(samples.GetNumCols(), 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / totalNumSamples, avg);
#if 1//NANCHECK
avg.HasNan("Mean-avg");
#endif
@ -453,15 +459,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!IsAccumulating())
LogicError("%ls %ls operation: MarkComputed(false) has not been called.", NodeName().c_str(), OperationName().c_str());
Matrix<ElemType> &samples = Inputs(0)->FunctionValues();
// set gaps to zero, since we are reducing in time
Inputs(0)->MaskMissingValuesColumnsToZero();
auto & samples = Inputs(0)->FunctionValues();
#if 1//NANCHECK
samples.HasNan("InvStdDev-Samples");
#endif
m_temp.SetValue(m_mean);
size_t numNewSample = samples.GetNumCols();
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), samples, false,
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_mean);
size_t numNewSamples = Inputs(0)->GetMBLayout()->DetermineActualNumSamples();
size_t totalNumSamples = m_numSamples + numNewSamples;
if (totalNumSamples == 0) totalNumSamples = 1; // 0/0=1 in this context
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, samples, false,
ConstOnes(samples.GetNumCols(), 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / totalNumSamples, m_mean);
m_temp -= m_mean;
m_temp.AssignElementPowerOf(m_temp, 2);
@ -470,9 +481,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_temp.AssignDifferenceOf(samples, m_mean);
m_temp.AssignElementPowerOf(m_temp, 2);
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / (m_numSamples + numNewSample), m_temp, false,
ConstOnes(numNewSample, 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / (m_numSamples + numNewSample), m_var);
Matrix<ElemType>::MultiplyAndWeightedAdd(1.0f / totalNumSamples, m_temp, false,
ConstOnes(samples.GetNumCols(), 1, samples.GetDeviceId()),
false, (ElemType)m_numSamples / totalNumSamples, m_var);
#if 1//NANCHECK
m_var.HasNan("InvStdDev-m_var");

Просмотреть файл

@ -330,11 +330,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
// helper functions for common cases
private:
ComputationNodeBasePtr Inputs(size_t index) const { return m_children[index]; } // TODO: delete this; change to m_children
// determine number of columns from a child and/or layout
// determine number of columns from a child and/or layout
size_t DetermineNumCols(const ComputationNodeBasePtr & child) const
{
size_t childCols = child->GetNumCols(); // this is what the child says
@ -495,7 +493,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
pair<ImageLayout, ImageLayout> GetImageLayouts() const { return make_pair(m_inputImageLayout, m_outputImageLayout); }
const size_t ChildrenSize() const { return m_children.size(); }
const size_t ChildrenSize() const { return m_children.size(); } // TODO: rename to NumChildren() or NumInputs(); and inside here where we use m_children, use m_children.size() as well
virtual void SetInput(const size_t childIndex, const ComputationNodeBasePtr& node) = 0;
@ -515,7 +513,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void MaskMissingGradientColumnsToZero() = 0;
virtual void MaskMissingGradientColumnsToZero(const size_t timeIdxInSeq) = 0; // TODO: don't we need a FrameRange here, too?
// indicates whether special handling is needed.The standard handleing will be just mask the function values after the evalaution and mask the gradient before gradiant computation for the children. this is not valid for all criterion nodes whose result is a scalar.
// overridden to return true by training/eval criteria (and the soon-to-be-deprecated PairNetworkNode, LSTMNode)
// The need for this seems an artifact of the old inconsistent layout architecture. In the future, this can probably just go away.
@ -844,7 +841,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class C, class... _Types> static inline shared_ptr<C> New(DEVICEID_TYPE deviceId, const wstring & name, _Types&&... _Args)
{
auto p = make_shared<C>(deviceId, name, forward<_Types>(_Args)...); // creates objects, esp. assigns deviceId to matrices, but otherwise does nothing
//disable this line. Instead we should make sure matrices are allocated at the right device
//p->MoveMatricesToDevice(deviceId); // this is a virtual call, i.e. it will handle extra matrices an object might own
return p;
@ -931,7 +927,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ReleaseMatrixToPool(m_functionValues, matrixPool);
}
}
virtual void DumpNodeInfo(const bool /*printValues*/, File& fstream) const;
// TODO: similar to DumpInfo; used by ExperimentalNetworkBuilder test implementation
@ -1172,9 +1167,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
inline ComputationNodePtr Inputs(const size_t childIndex) const // TODO: rename to Input
{
#ifdef DEBUG // profile shows this is range check very expensive in release mode, skip it
#ifdef _DEBUG // profile shows this is range check very expensive in release mode, skip it
if (childIndex >= m_children.size())
InvalidArgument ("childIndex is out of range.");
LogicError("Inputs: childIndex is out of range.");
#endif
return UpCast(m_children[childIndex]);
}
@ -1207,7 +1202,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Note: This returns an object, not a reference. That object is a column slice, i.e. a small object that just points into another object.
// TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together.
// Note: This is not used anywhere yet, only a sketch how we may further abstract timing.
Matrix<ElemType> DataSlice(Matrix<ElemType>& data,
Matrix<ElemType> DataSlice(Matrix<ElemType> & data,
const FrameRange & frameRange/*select frame or entire batch*/)
{
return DataSlice(data, frameRange, m_pMBLayout);
@ -1250,7 +1245,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return data.ColumnSlice(startColumn + frameRange.seqIndex, 1);
}
}
enum ValueOrGradient { VALUE, GRADIENT };
Matrix<ElemType> DataSlice(ValueOrGradient valueOrGradient/*as it says*/,
const FrameRange & frameRange/*select frame or entire batch*/)
@ -1351,7 +1345,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
child->ClearGradient(true);
child->MarkGradientInitialized(true);
}
ComputeInputPartial(i, FrameRange(timeIdxInSeq)); //this computes partial wrt to the child and sums the gradient value in the child
}
#ifdef DISPLAY_DEBUG
@ -1364,16 +1357,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/*implement*/void ClearGradientForChildren()
{
for (size_t i = 0; i<m_children.size(); i++)
for (size_t i=0; i<m_children.size(); i++)
{
ComputationNodePtr child = Inputs(i);
child->MarkGradientInitialized(false);
}
}
virtual void ClearGradient(const bool clearExistingGradientValue)
{
{
if (NeedGradient())
{
{
//ClearChildGradientComputationFlag();
if (clearExistingGradientValue)
@ -1436,7 +1429,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
matrixPtr = make_shared<Matrix<ElemType>>(m_deviceId);
}
}
//to be called by derived classed if that class needs to print node values
void PrintNodeValuesToFile(const bool printValues, File& fstream) const
{
@ -1577,7 +1569,6 @@ public: \
using Base::RequestMatricesBeforeGradientComp; using Base::ReleaseMatricesAfterGradientComp; \
using Base::Validate; using Base::ValidateUnaryMap; using Base::ValidateBinaryZip; using Base::ValidateUnaryReduce; using Base::ValidateBinaryReduce; using Base::ValidateInferBinaryChildrenDims; using Base::ValidateInferChildDims
#define ComputationNodeBoilerplate \
protected: /* some boilerplate goes here */ \
virtual const std::wstring OperationName() const override { return TypeName(); } \

Просмотреть файл

@ -21,7 +21,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
template<class ElemType>
class ErrorPredictionNode : public ComputationNodeNonLooping/*ComputationNode*/<ElemType>, public NumInputs<2>
class ErrorPredictionNode : public ComputationNodeNonLooping/*ComputationNode*/<ElemType>
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"ErrorPrediction"; }
@ -41,16 +41,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
{
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), *m_maxIndexes0, *m_maxIndexes1, *m_maxValues, shared_from_this());
EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), *m_maxIndexes0, *m_maxIndexes1, *m_maxValues, m_topK, shared_from_this());
}
void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues0, const Matrix<ElemType>& inputFunctionValues1, Matrix<ElemType>& maxIndexes0, Matrix<ElemType>& maxIndexes1, Matrix<ElemType>& maxValues, ComputationNodePtr curNode)
void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues0, const Matrix<ElemType>& inputFunctionValues1, Matrix<ElemType>& maxIndexes0, Matrix<ElemType>& maxIndexes1, Matrix<ElemType>& maxValues, int topK, ComputationNodePtr curNode)
{
inputFunctionValues0.VectorMax(maxIndexes0, maxValues, true);
inputFunctionValues1.VectorMax(maxIndexes1, maxValues, true);
inputFunctionValues1.VectorMax(maxIndexes1, maxValues, true, topK);
curNode->MaskMissingColumnsToZero(maxIndexes0, Inputs(0)->GetMBLayout()); // we are fine since it will only be called with full minibatch
curNode->MaskMissingColumnsToZero(maxIndexes1, Inputs(1)->GetMBLayout());
functionValues.AssignNumOfDiff(maxIndexes0, maxIndexes1);
functionValues.AssignNumOfDiff(maxIndexes0, maxIndexes1, topK > 1);
#if NANCHECK
functionValues.HasNan("ErrorPrediction");
#endif
@ -77,18 +77,38 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ValidateInferChildDims(index, rows, cols);
}
m_topK = 1;
if (m_children.size() == 3)
{
if (Inputs(2)->FunctionValues().GetNumRows() != 1 || Inputs(2)->FunctionValues().GetNumCols() != 1)
throw std::logic_error("TopK in ErrorPredictionNode must be a scalar value.");
m_topK = static_cast<int>(Inputs(2)->FunctionValues().Get00Element());
}
//if (Inputs(0)->GetNumRows() == 0 || Inputs(1)->GetNumRows() == 0)
// LogicError("ErrorPrediction operation: one of the operands has 0 elements.");
if (isFinalValidationPass)
{
if (!(Inputs(0)->GetNumRows() == Inputs(1)->GetNumRows() && Inputs(0)->GetNumCols() == Inputs(1)->GetNumCols()))
{
LogicError("The Matrix dimension in the ErrorPrediction operation does not match.");
}
if (((!(Inputs(0)->FunctionValues().GetNumRows() == Inputs(1)->FunctionValues().GetNumRows() && //match size
Inputs(0)->FunctionValues().GetNumCols() == Inputs(1)->FunctionValues().GetNumCols()))) && Inputs(0)->GetLoopId() < 0)
{
LogicError("The Matrix dimension in the ErrorPrediction operation does not match.");
}
}
Resize(1,1);
m_pMBLayout = nullptr; // this node does not hold mini-batch data
InferImageDimsFromInputs();
// resize the temporaries to their proper size
size_t cols = Inputs(0)->GetNumCols();
m_maxIndexes0->Resize(m_topK, cols);
m_maxIndexes1->Resize(m_topK, cols);
m_maxValues->Resize(m_topK, cols);
}
virtual void InferImageDimsFromInputs()
@ -117,7 +137,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
*node->m_maxValues = *m_maxValues;
}
}
//request matrices needed to do node function value evaluation
virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool)
{
@ -136,13 +155,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ReleaseMatrixToPool(m_maxIndexes1, matrixPool);
ReleaseMatrixToPool(m_maxValues, matrixPool);
}
protected:
virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; }
private:
shared_ptr<Matrix<ElemType>> m_maxIndexes0, m_maxIndexes1;
shared_ptr<Matrix<ElemType>> m_maxValues;
int m_topK;
};
template class ErrorPredictionNode<float>;

Просмотреть файл

@ -381,11 +381,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// stacks multiple inputs on top of each other
// -----------------------------------------------------------------------
//this node is used to extract part of the input by rows as the output
// TODO: Really? RowStack indicates something different.
//it has to be continuous segments of rows since each column is treated as one sample
template<class ElemType>
class RowStackNode : public ComputationNode<ElemType> // note: not deriving from NumInputs<> like most other nodes since this one takes a variable number of inputs
class RowStackNode : public ComputationNode<ElemType> // note: not deriving from NumInputs<> like most other nodes, because this one takes a variable number of inputs
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"RowStack"; }
@ -397,21 +394,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
Base::CopyTo(nodeP, newName, flags);
auto node = dynamic_pointer_cast<RowStackNode<ElemType>>(nodeP);
if (flags & CopyNodeFlags::copyNodeChildren)
{
node->m_children = m_children;
auto node = dynamic_pointer_cast<RowStackNode<ElemType>>(nodeP);
node->m_startRowIndices = m_startRowIndices;
node->m_inputMatrices = m_inputMatrices;
}
}
virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex >= ChildrenSize())
InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range.");
ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndices[inputIndex], m_startRowIndices[inputIndex + 1] - m_startRowIndices[inputIndex]);
ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndices[inputIndex]);
}
virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
@ -419,62 +412,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndices[inputIndex], m_startRowIndices[inputIndex+1] - m_startRowIndices[inputIndex]);
ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndices[inputIndex]);
}
/*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const size_t startIndex, const size_t numRows)
/*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const size_t startIndex)
{
inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, numRows);
}
void EvaluateThisNodeMap() // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
{
EvaluateThisNodeS(FunctionValues(), m_inputMatrices, 0, Inputs(0)->GetNumCols());
inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, inputGradientValues.GetNumRows());
}
virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
{
//if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
Matrix<ElemType> sliceFunctionValues = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences());
}
// TODO: change to FrameRange
/*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
{
functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols);
#if NANCHECK
functionValues.HasNan("RowStack");
#endif
for (size_t i = 0; i < ChildrenSize(); i++)
ValueSlice(frameRange).AssignToRowSliceValuesOf(Inputs(i)->ValueSlice(frameRange), m_startRowIndices[i], Inputs(i)->GetNumRows());
}
virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
size_t numCols = Inputs(0)->GetNumCols();
m_startRowIndices.resize(ChildrenSize()+1);
m_inputMatrices.resize(ChildrenSize());
// count totalRows and form m_startRowIndices[] array, which is the cumulative sum of matrix heights
m_startRowIndices.resize(ChildrenSize());
size_t totalRows = 0;
m_startRowIndices[0] = 0;
for (int i = 0; i < ChildrenSize(); i++)
{
Matrix<ElemType>& childMatrix = Inputs(i)->FunctionValues();
size_t numRows = childMatrix.GetNumRows();
if (isFinalValidationPass && childMatrix.GetNumCols() != numCols)
if (isFinalValidationPass && Inputs(i)->GetNumCols() != numCols)
LogicError("RowStack operation: the input node %ls has different number of columns.", Inputs(i)->NodeName().c_str());
totalRows += numRows;
m_inputMatrices[i] = &childMatrix;
m_startRowIndices[i + 1] = m_startRowIndices[i] + numRows;
m_startRowIndices[i] = totalRows;
totalRows += Inputs(i)->GetNumRows();
}
Resize(totalRows, numCols);
InferMBLayoutFromInputsForStandardCase();
InferImageDimsFromInputs();
}
@ -489,8 +461,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
private:
std::vector<size_t> m_startRowIndices; //start row number in the stacked matrix of each input (child)
std::vector<const Matrix<ElemType>*> m_inputMatrices;
std::vector<size_t> m_startRowIndices; // start row number in the stacked matrix of each input (child) (cumsum of matrix heights)
};
template class RowStackNode<float>;
@ -1823,7 +1794,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
*node->m_rightGradient = *m_rightGradient;
}
}
//request matrices that are needed for gradient computation
virtual void RequestMatricesBeforeGradientComp(MatrixPool& matrixPool)
{
@ -1839,7 +1809,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ReleaseMatrixToPool(m_innerproduct, matrixPool);
ReleaseMatrixToPool(m_rightGradient, matrixPool);
}
private:
shared_ptr<Matrix<ElemType>> m_innerproduct;
shared_ptr<Matrix<ElemType>> m_rightGradient;
@ -2034,7 +2003,6 @@ private:
*node->m_temp = *m_temp;
}
}
//request matrices needed to do node function value evaluation
virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool)
{
@ -2062,7 +2030,6 @@ private:
ReleaseMatrixToPool(m_rightTerm, matrixPool);
ReleaseMatrixToPool(m_temp, matrixPool);
}
private:
// invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
shared_ptr<Matrix<ElemType>> m_invNorm0;
@ -2437,7 +2404,6 @@ private:
*node->m_temp = *m_temp;
}
}
//request matrices needed to do node function value evaluation
virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool)
{
@ -2467,7 +2433,6 @@ private:
ReleaseMatrixToPool(m_invNormSquare, matrixPool);
ReleaseMatrixToPool(m_temp, matrixPool);
}
private:
// invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
shared_ptr<Matrix<ElemType>> m_invNorm0;

Просмотреть файл

@ -21,8 +21,12 @@
# defaults to acml
# CUDA_PATH= Path to CUDA
# If not specified, GPU will not be enabled
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
# defaults to /usr/local/cub-1.4.1
# KALDI_PATH= Path to Kaldi
# If not specified, Kaldi plugins will not be built
# OPENCV_PATH= path to OpenCV 3.0.0 installation, so $(OPENCV_PATH) exists
# defaults to /usr/local/opencv-3.0.0
ifndef BUILD_TOP
BUILD_TOP=.
@ -77,7 +81,12 @@ ifdef CUDA_PATH
ifndef GDK_PATH
$(info defaulting GDK_PATH to /usr)
GDK_PATH=/usr
endif
endif
ifndef CUB_PATH
$(info defaulting CUB_PATH to /usr/local/cub-1.4.1)
CUB_PATH=/usr/local/cub-1.4.1
endif
DEVICE = gpu
@ -85,6 +94,7 @@ endif
# This is a suggested/default location for NVML
INCLUDEPATH+=$(GDK_PATH)/include/nvidia/gdk
INCLUDEPATH+=$(CUB_PATH)
NVMLPATH=$(GDK_PATH)/src/gdk/nvml/lib
# Set up CUDA includes and libraries
@ -367,6 +377,26 @@ $(KALDI2READER): $(KALDI2READER_OBJ) | $(CNTKMATH_LIB)
endif
########################################
# ImageReader plugin
########################################
ifdef OPENCV_PATH
IMAGEREADER_SRC =\
DataReader/ImageReader/Exports.cpp \
DataReader/ImageReader/ImageReader.cpp \
IMAGEREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(IMAGEREADER_SRC))
IMAGEREADER:=$(LIBDIR)/ImageReader.so
ALL += $(IMAGEREADER)
SRC+=$(IMAGEREADER_SRC)
$(IMAGEREADER): $(IMAGEREADER_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ -l$(CNTKMATH)
endif
########################################
# cntk
########################################

Просмотреть файл

@ -597,6 +597,7 @@ namespace CNTKMathTest
M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28;
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
#if 0
Matrix M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
@ -610,6 +611,7 @@ namespace CNTKMathTest
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
#endif
}
TEST_METHOD(CPUAssignRepeatOf)

Просмотреть файл

@ -316,6 +316,7 @@ namespace CNTKMathTest
GPUMatrix<float> M4(2, 3, fArray4, matrixFlagNormal);
Assert::IsTrue(M2.IsEqualTo(M4, 0.0001));
#if 0
GPUMatrix<float> M5, M6, M7, M8;
M5.AssignRowSliceValuesOf(M0, 0, 2);
M6.AssignRowSliceValuesOf(M0, 2, 1);
@ -329,6 +330,7 @@ namespace CNTKMathTest
M8.AssignRowStackValuesOf(inputMatrices, 0, 3);
Assert::IsTrue(M8.IsEqualTo(M0, 0.0001));
#endif
}
TEST_METHOD(GPUKhatriRaoProduct)

Просмотреть файл

@ -821,5 +821,60 @@ namespace CNTKMathTest
Assert::IsTrue(SingleMatrix::HasElement(m1, posInf));
}
}
TEST_METHOD(MatrixVectorMax)
{
// Matrices are stored as column-major so below is 3x2 matrix.
float src[] = {
1.0f, 3.0f, 4.0f,
6.0f, 2.0f, 5.0f };
float expectedIdx[] = {
2.0f, 1.0f,
0.0f, 2.0f };
float expectedVal[] = {
4.0f, 3.0f,
6.0f, 5.0f };
for (int deviceId : { -1, AUTOPLACEMATRIX })
{
Matrix<float> expIdx(2, 2, expectedIdx, matrixFlagNormal, deviceId);
Matrix<float> expVal(2, 2, expectedVal, matrixFlagNormal, deviceId);
Matrix<float> actual(3, 2, src, matrixFlagNormal, deviceId);
Matrix<float> actualIdx(deviceId);
Matrix<float> actualVal(deviceId);
int topK = 2;
actual.VectorMax(actualIdx, actualVal, true, topK);
Assert::IsTrue(actualIdx.IsEqualTo(expIdx));
Assert::IsTrue(actualVal.IsEqualTo(expVal));
}
}
TEST_METHOD(MatrixAssignNumOfDiff)
{
float labels[] = { 1.0f, 2.0f, 3.0f };
// Matrices are stored as column-major so below is 2x3 matrix.
float topKResults[] = {
1.0f, 3.0f,
4.0f, 6.0f,
2.0f, 3.0f };
for (int deviceId : { -1, AUTOPLACEMATRIX })
{
Matrix<float> lbl(1, 3, labels, matrixFlagNormal, deviceId);
Matrix<float> topKRes(2, 3, topKResults, matrixFlagNormal, deviceId);
Matrix<float> actual(deviceId);
actual.AssignNumOfDiff(lbl, topKRes, true);
float expectedDiff = 1.0;
Assert::AreEqual(expectedDiff, actual.Get00Element());
}
}
};
}

Просмотреть файл

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
@ -24,7 +24,7 @@
</SccProvider>
<ProjectName>CNTKMathCUDA</ProjectName>
<CudaPath>$(CUDA_PATH_V7_0)</CudaPath>
<CudaToolkitCustomDir>$(CUDA_PATH)</CudaToolkitCustomDir>
<CudaToolkitCustomDir>$(CudaPath)</CudaToolkitCustomDir>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
@ -47,7 +47,7 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IncludePath>..\..\common\include;$(ACML_PATH)\include;$(CudaPath)\include;$(IncludePath)</IncludePath>
<IncludePath>..\..\common\include;$(ACML_PATH)\include;$(CudaPath)\include;$(CUB_PATH);$(IncludePath)</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(ACML_PATH)\lib;$(CudaPath)\lib\$(Platform);$(LibraryPath)</LibraryPath>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
</PropertyGroup>
@ -187,4 +187,8 @@
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.0.targets" />
</ImportGroup>
<Target Name="CheckDependencies">
<Error Condition="!Exists('$(CUB_PATH)')"
Text="CNTK requires NVIDIA CUB library v1.4.1 to build. Please download the library from https://nvlabs.github.io/cub/ and set CUB_PATH environment variable to CUB root path (e.g. c:\src\cub-1.4.1)." />
</Target>
</Project>

Просмотреть файл

@ -31,9 +31,7 @@
<ClCompile Include="cudalib.cpp">
<Filter>GPU\SequenceTraining</Filter>
</ClCompile>
<ClCompile Include="DebugUtil.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\DebugUtil.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="CommonMatrix.h" />

Просмотреть файл

@ -20,6 +20,7 @@
#include <exception>
#include <thread>
#include<iostream>
#include <algorithm>
#ifdef _WIN32
#include <Windows.h>
#else
@ -477,6 +478,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return diag;
}
#if 0
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
@ -518,6 +520,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
#endif
template<class ElemType>
void CPUMatrix<ElemType>::MinusOneAt(CPUMatrix<ElemType>& c, const size_t position)
@ -3299,7 +3302,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//I decided to use CPUMatrix<ElemType>& maxIndexes instead of integer vector because the result may be used to do additional calculation
template<class ElemType>
void CPUMatrix<ElemType>::VectorMax(CPUMatrix<ElemType>& maxIndexes, CPUMatrix<ElemType>& maxValues, const bool isColWise) const
void CPUMatrix<ElemType>::VectorMax(CPUMatrix<ElemType>& maxIndexes, CPUMatrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
@ -3307,33 +3310,63 @@ namespace Microsoft { namespace MSR { namespace CNTK {
auto& us=*this;
const int m = (int)GetNumRows();
const int n = (int)GetNumCols();
assert(topK <= m);
assert (m>0 && n>0); //converting from size_t to int may cause overflow
if (isColWise) //col-wise
{
maxValues.Resize(1, n);
maxIndexes.Resize(1, n);
maxValues.Resize(topK, n);
maxIndexes.Resize(topK, n);
#pragma omp parallel for
for (int j=0; j<n; j++)
if (topK == 1)
{
ElemType v = us(0, j);
size_t index = 0;
foreach_row(i,us)
#pragma omp parallel for
for (int j = 0; j < n; j++)
{
if (v < us(i,j))
ElemType v = us(0, j);
size_t index = 0;
foreach_row(i, us)
{
index = i;
v = us(i,j);
if (v < us(i, j))
{
index = i;
v = us(i, j);
}
}
maxValues(0, j) = v;
maxIndexes(0, j) = (ElemType)index;
}
}
else
{
std::vector<int> indices(m);
int i = 0;
std::generate(indices.begin(), indices.end(), [&i] { return i++; });
const ElemType* curVal = m_pArray;
ElemType* curIdx = maxIndexes.m_pArray;
ElemType* curMax = maxValues.m_pArray;
for (int icol = 0; icol < n; icol++, curVal += m, curIdx += topK, curMax += topK)
{
// Partial sort, descending order.
std::nth_element(indices.begin(), indices.begin() + topK, indices.end(),
[curVal](const int& a, const int& b) { return curVal[a] > curVal[b]; });
// REVIEW alexeyk: the following produces warning (see SCL_SECURE_NO_WARNINGS) so use loop instead.
//std::transform(indices.begin(), indices.begin() + topK, curIdx, [](const int& a) { return static_cast<ElemType>(a); });
for (int i = 0; i < topK; i++)
{
curIdx[i] = static_cast<ElemType>(indices[i]);
curMax[i] = curVal[indices[i]];
}
}
maxValues(0,j) = v;
maxIndexes(0,j) = (ElemType)index;
}
}
else
{
if (topK > 1)
RuntimeError("Row-wise TopK max is not supported.");
maxValues.Resize(m,1);
maxIndexes.Resize(m, 1);
@ -3418,19 +3451,35 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignNumOfDiff(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b)
CPUMatrix<ElemType>& CPUMatrix<ElemType>::AssignNumOfDiff(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, bool searchInCol)
{
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
InvalidArgument("AssignNumOfDiff: a and b must have same dimension.");
if (a.GetNumCols() != b.GetNumCols())
throw std::invalid_argument("AssignNumOfDiff: a and b must have the same number of columns.");
if (!searchInCol && a.GetNumRows() != b.GetNumRows())
throw std::invalid_argument("AssignNumOfDiff: a and b must have the same number of rows.");
ElemType n = 0;
foreach_coord(i,j,a)
if (!searchInCol)
{
n += (a(i,j) != b(i,j));
foreach_coord(i, j, a)
{
n += (a(i, j) != b(i, j));
}
}
else
{
size_t crow = b.GetNumRows();
const ElemType* curCol = b.m_pArray;
for (size_t icol = 0; icol < a.GetNumCols(); icol++, curCol += crow)
{
auto res = std::find(curCol, curCol + crow, a(0, icol));
if (res == curCol + crow)
n++;
}
}
Resize(1,1); //result should be one element
(*this)(0,0) = n;
Resize(1, 1); //result should be one element
(*this)(0, 0) = n;
return *this;
}

Просмотреть файл

@ -242,7 +242,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AddWithRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
//CPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const CPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
CPUMatrix<ElemType>& AssignToRowSliceValuesOf(const CPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
@ -252,10 +252,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CPUMatrix<ElemType>& AssignPositiveAndShiftedNegSample(const CPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
CPUMatrix<ElemType>& AddFoldedPositiveAndShiftedNegSample(const CPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
void VectorMax(CPUMatrix<ElemType>& maxIndexes, CPUMatrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMin(CPUMatrix<ElemType>& mainndexes, CPUMatrix<ElemType>& minValues, const bool isColWise) const;
void VectorMax(CPUMatrix<ElemType>& maxIndexes, CPUMatrix<ElemType>& maxValues, const bool isColWise, int topK = 1) const;
void VectorMin(CPUMatrix<ElemType>& minIndexes, CPUMatrix<ElemType>& minValues, const bool isColWise) const;
CPUMatrix<ElemType>& AssignNumOfDiff(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b);
CPUMatrix<ElemType>& AssignNumOfDiff(const CPUMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, bool searchInCol = false);
void Print(const char* matrixName, size_t rowStart, size_t rowEnd, size_t colStart, size_t colEnd) const;
void Print(const char* matrixName = nullptr) const; //print whole matrix. can be expensive

Просмотреть файл

@ -485,6 +485,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>::~GPUMatrix(void)
{
Clear();
if (m_workspace != nullptr)
delete m_workspace;
}
template<class ElemType>
@ -711,6 +713,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return diag;
}
#if 0
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
@ -767,6 +770,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
#endif
/// c = c - 1.0 for a specific position
template<class ElemType>
@ -1159,12 +1163,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void GPUMatrix<ElemType>::SetUniformRandomValue(const ElemType low, const ElemType high, unsigned long seed)
{
PrepareDevice();
#if 0 // to change the seed, we must tear down the random generator
// This is not efficient, but for correctness, we must do it.
if (s_curandGenerator && (seed != USE_TIME_BASED_SEED))
{
fprintf(stderr, "SetUniformRandomValue (GPU): destroying curand object\n");
CURAND_CALL(curandDestroyGenerator(((curandGenerator_t*)s_curandGenerator)[0])); // TODO: what is this typecast business??
delete s_curandGenerator;
s_curandGenerator = NULL;
}
#endif
if (s_curandGenerator==NULL)
{
{
unsigned long long cudaSeed = (seed == USE_TIME_BASED_SEED) ? time(NULL) : seed;
fprintf(stderr, "SetUniformRandomValue (GPU): creating curand object with seed %llu\n", cudaSeed);
s_curandGenerator = new curandGenerator_t;
/* Create pseudo-random number generator */
CURAND_CALL(curandCreateGenerator(&(((curandGenerator_t*)s_curandGenerator)[0]),CURAND_RNG_PSEUDO_XORWOW));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(((curandGenerator_t*)s_curandGenerator)[0], seed==USE_TIME_BASED_SEED ? time(NULL) : seed));
/* Create pseudo-random number generator */
CURAND_CALL(curandCreateGenerator(&(((curandGenerator_t*)s_curandGenerator)[0]),CURAND_RNG_PSEUDO_XORWOW));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(((curandGenerator_t*)s_curandGenerator)[0], cudaSeed));
CURAND_CALL(curandSetGeneratorOrdering(((curandGenerator_t*)s_curandGenerator)[0],CURAND_ORDERING_PSEUDO_SEEDED));
}
@ -1197,12 +1213,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void GPUMatrix<ElemType>::SetGaussianRandomValue(const ElemType mean, const ElemType sigma, unsigned long seed)
{
PrepareDevice();
#if 0 // to change the seed, we must tear down the random generator
// This is not efficient, but for correctness, we must do it.
if (s_curandGenerator && (seed != USE_TIME_BASED_SEED))
{
fprintf(stderr, "SetGaussianRandomValue (GPU): destroying curand object\n");
CURAND_CALL(curandDestroyGenerator(((curandGenerator_t*)s_curandGenerator)[0])); // TODO: what is this typecast business??
delete s_curandGenerator;
s_curandGenerator = NULL;
}
#endif
if (s_curandGenerator==NULL)
{
{
unsigned long long cudaSeed = (seed == USE_TIME_BASED_SEED) ? time(NULL) : seed;
fprintf(stderr, "SetGaussianRandomValue (GPU): creating curand object with seed %llu\n", cudaSeed);
s_curandGenerator = new curandGenerator_t;
/* Create pseudo-random number generator */
CURAND_CALL(curandCreateGenerator(&(((curandGenerator_t*)s_curandGenerator)[0]),CURAND_RNG_PSEUDO_XORWOW));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(((curandGenerator_t*)s_curandGenerator)[0], seed==USE_TIME_BASED_SEED ? time(NULL) : seed));
CURAND_CALL(curandCreateGenerator(&(((curandGenerator_t*)s_curandGenerator)[0]),CURAND_RNG_PSEUDO_XORWOW));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(((curandGenerator_t*)s_curandGenerator)[0], cudaSeed));
CURAND_CALL(curandSetGeneratorOrdering(((curandGenerator_t*)s_curandGenerator)[0],CURAND_ORDERING_PSEUDO_SEEDED));
}
@ -2906,32 +2934,132 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
const GPUMatrix<ElemType>& us=*this;
const GPUMatrix<ElemType>& us = *this;
const CUDA_LONG m = (CUDA_LONG)GetNumRows();
const CUDA_LONG n = (CUDA_LONG)GetNumCols();
assert (m>0 && n>0); //converting from size_t to int may cause overflow
assert(m > 0 && n > 0); //converting from size_t to int may cause overflow
PrepareDevice();
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
if (isColWise)
{
maxValues.Resize(1, n);
maxIndexes.Resize(1, n);
int blocksPerGrid = n; //we'll have 1 block processing 1 column
_vectorMaxMinReduce<ElemType, true><<<blocksPerGrid,threadsPerBlock,0,t_stream>>>(us.m_pArray,maxIndexes.m_pArray,maxValues.m_pArray,m,n);
_vectorMaxMinReduce<ElemType, true><<<blocksPerGrid, threadsPerBlock, 0, t_stream>>>(us.m_pArray, maxIndexes.m_pArray, maxValues.m_pArray, m, n);
/*int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock);
/*int blocksPerGrid=(int)ceil(1.0*n/threadsPerBlock);
_vectorMax<ElemType><<<blocksPerGrid,threadsPerBlock,0,t_stream>>>(us.m_pArray,maxIndexes.m_pArray,maxValues.m_pArray,m,n,isColWise);*/
}
else
{
maxValues.Resize(m, 1);
maxIndexes.Resize(m, 1);
int blocksPerGrid=(int)ceil(1.0*m/threadsPerBlock);
_vectorMax<ElemType><<<blocksPerGrid,threadsPerBlock,0,t_stream>>>(us.m_pArray,maxIndexes.m_pArray,maxValues.m_pArray,m,n,isColWise);
int blocksPerGrid = (int)ceil(1.0*m / threadsPerBlock);
_vectorMax<ElemType><<<blocksPerGrid, threadsPerBlock, 0, t_stream>>>(us.m_pArray, maxIndexes.m_pArray, maxValues.m_pArray, m, n, isColWise);
}
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
}
__global__ void _initIndicesForSort(uint64_t* indexes, CUDA_LONG crow, CUDA_LONG ccol)
{
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
if (id >= crow * ccol)
return;
uint32_t irow = id % crow;
uint32_t icol = id / crow;
indexes[id] = (static_cast<uint64_t>(irow) << 32) | icol;
}
template<class ElemType>
void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
if (topK == 1)
{
VectorMax(maxIndexes, maxValues, isColWise);
return;
}
if (!isColWise)
RuntimeError("Row-wise TopK max is not supported.");
const GPUMatrix<ElemType>& us = *this;
const CUDA_LONG m = (CUDA_LONG)GetNumRows();
const CUDA_LONG n = (CUDA_LONG)GetNumCols();
assert(topK <= m);
assert(m > 0 && n > 0); //converting from size_t to int may cause overflow
PrepareDevice();
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
maxValues.Resize(topK, n);
maxIndexes.Resize(topK, n);
// To sort matrix columns we use 2-pass _stable_ sort algorithm:
// 1. Sort by values (descending) with corresponding row/col indexes.
// 2. Sort by col indices (ascending) with corresponding values/row indices.
// Indices are stored as 64-bit ints where low 32 bits represent column and high 32 bits - row index.
// On the second pass only first 32 bits of the index are used in sorting, so SortPairs has
// begin_bit and end_bit set accordingly.
CUDA_LONG celt = static_cast<CUDA_LONG>(GetNumElements());
ElemType* inVal = us.m_pArray;
ElemType* outVal1 = nullptr;
ElemType* outVal2 = nullptr;
uint64_t* inIdx = nullptr;
uint64_t* outIdx = nullptr;
// Determine temp buffer size needed for SortPairsDescending to sort values on the first pass.
size_t cbtemp = 0;
// If first param is nullptr then no actual work is done except writing result to cbtemp.
CUDA_CALL(cub::DeviceRadixSort::SortPairsDescending(nullptr, cbtemp, inVal, outVal1, inIdx, outIdx, celt, 0, sizeof(ElemType) * 8, t_stream));
size_t ctemp1 = (cbtemp + sizeof(ElemType) - 1) / sizeof(ElemType);
// Determine temp buffer size needed for SortPairs to sort indices on the second pass.
cbtemp = 0;
CUDA_CALL(cub::DeviceRadixSort::SortPairs(nullptr, cbtemp, outIdx, inIdx, outVal1, outVal2, celt, 0, 32, t_stream));
size_t ctemp2 = (cbtemp + sizeof(ElemType) - 1) / sizeof(ElemType);
size_t ctemp = std::max(ctemp1, ctemp2);
cbtemp = ctemp * sizeof(ElemType);
// ElemType count needed to store indices, accounting for natural alignment for uint64_t type.
size_t cidx = ((celt + 1) * sizeof(uint64_t) - 1 + sizeof(ElemType) - 1) / sizeof(ElemType);
// Prepare temp workspace.
auto deviceId = m_computeDevice;
assert(m_workspace != nullptr);
auto workspace = m_workspace->pop_or_create([deviceId]() { return std::make_unique<GPUMatrix<ElemType>>(deviceId); });
// Resize to store: output values for the 1st and 2nd passes, input indices, output indices, and temp storage.
workspace->Resize(m, 2 * n + (2 * cidx + ctemp + m - 1) / m);
outVal1 = workspace->m_pArray;
outVal2 = outVal1 + celt;
inIdx = reinterpret_cast<uint64_t*>(outVal2 + celt);
// Align indices pointer if needed.
size_t cbAlign = reinterpret_cast<size_t>(inIdx) % sizeof(uint64_t);
if (cbAlign != 0)
reinterpret_cast<uint8_t*&>(inIdx) += sizeof(uint64_t) - cbAlign;
outIdx = inIdx + celt;
void* ptmp = outIdx + celt;
assert(reinterpret_cast<ElemType*>(reinterpret_cast<uint8_t*>(ptmp) + cbtemp) <= workspace->m_pArray + workspace->GetNumElements());
// Initialize indices.
const int ThreadsPerBlock = 128;
int cblock = (celt + ThreadsPerBlock - 1) / ThreadsPerBlock;
_initIndicesForSort<<<cblock, ThreadsPerBlock, 0, t_stream>>>(inIdx, m, n);
// Sort by values.
CUDA_CALL(cub::DeviceRadixSort::SortPairsDescending(ptmp, cbtemp, inVal, outVal1, inIdx, outIdx, celt, 0, sizeof(ElemType) * 8, t_stream));
// Sort by column indices. outIdx contains indices after the first pass so it's used as an input.
CUDA_CALL(cub::DeviceRadixSort::SortPairs(ptmp, cbtemp, outIdx, inIdx, outVal1, outVal2, celt, 0, 32, t_stream));
// Copy results.
cblock = (topK * n + ThreadsPerBlock - 1) / ThreadsPerBlock;
_copyTopKResults<<<cblock, ThreadsPerBlock, 0, t_stream>>>(inIdx, outVal2, maxIndexes.m_pArray, maxValues.m_pArray, m, n, topK);
m_workspace->push(std::move(workspace));
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
}
@ -2975,21 +3103,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignNumOfDiff(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b)
GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignNumOfDiff(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, bool searchInCol)
{
if (a.GetNumRows() != b.GetNumRows() || a.GetNumCols() != b.GetNumCols())
InvalidArgument("AssignNumOfDiff: a and b must have same dimension.");
if (a.GetNumCols() != b.GetNumCols())
InvalidArgument("AssignNumOfDiff: a and b must have the same number of columns.");
if (!searchInCol && a.GetNumRows() != b.GetNumRows())
InvalidArgument("AssignNumOfDiff: a and b must have the same number of rows.");
Resize(1,1); //result should be one element
Resize(1, 1); //result should be one element
PrepareDevice();
cudaEvent_t done = nullptr;
//int blocksPerGrid=(int)ceil(1.0*a.GetNumElements()/threadsPerBlock);
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
//_assignNumOfDiff<ElemType><<<blocksPerGrid,threadsPerBlock,0,t_stream>>>(a.m_pArray, b.m_pArray, m_pArray, a.GetNumElements());
_assignNumOfDiff<ElemType><<<1,1024,0,t_stream>>>(a.m_pArray, b.m_pArray, m_pArray, (CUDA_LONG)a.GetNumElements());
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
if (!searchInCol)
{
//int blocksPerGrid=(int)ceil(1.0*a.GetNumElements()/threadsPerBlock);
//_assignNumOfDiff<ElemType><<<blocksPerGrid,threadsPerBlock,0,t_stream>>>(a.m_pArray, b.m_pArray, m_pArray, a.GetNumElements());
_assignNumOfDiff<ElemType><<<1, 1024, 0, t_stream>>>(a.m_pArray, b.m_pArray, m_pArray, (CUDA_LONG)a.GetNumElements());
}
else
{
const int blockSize = 1024;
_assignNumOfDiffCol<blockSize><<<1, blockSize, 0, t_stream>>>(a.m_pArray, b.m_pArray, m_pArray,
static_cast<CUDA_LONG>(b.GetNumRows()), static_cast<CUDA_LONG>(a.GetNumCols()));
}
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
return *this;
}

Просмотреть файл

@ -88,6 +88,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static cublasHandle_t s_cuHandle[MaxGpus];
static void *s_curandGenerator;
// Have to use naked pointer to avoid issues with __declspec(dllexport) on Windows.
// REVIEW alexeyk: can be allocated lazily but the current footprint is small anyway.
mutable conc_stack<std::unique_ptr<GPUMatrix<ElemType>>>* m_workspace = new conc_stack<std::unique_ptr<GPUMatrix<ElemType>>>;
private:
void performInplaceFunction(int kind);
size_t LocateElement (const size_t i, const size_t j) const;
@ -286,7 +290,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>& AssignRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddToRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& a, const size_t startIndex, const size_t numRows);
GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
//GPUMatrix<ElemType>& AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
GPUMatrix<ElemType>& AssignRepeatOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
GPUMatrix<ElemType>& AddToRowRepeatValuesOf(const GPUMatrix<ElemType>& a, const size_t numRowRepeats);
@ -295,9 +299,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>& AddFoldedPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);
void VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMin(GPUMatrix<ElemType>& mainndexes, GPUMatrix<ElemType>& minValues, const bool isColWise) const;
void VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK) const;
void VectorMin(GPUMatrix<ElemType>& minIndexes, GPUMatrix<ElemType>& minValues, const bool isColWise) const;
GPUMatrix<ElemType>& AssignNumOfDiff(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);
GPUMatrix<ElemType>& AssignNumOfDiff(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, bool searchInCol = false);
GPUMatrix<ElemType>& AssignInnerProductOfMatrices(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b);

Просмотреть файл

@ -12,6 +12,20 @@
#include <cuda_runtime.h>
#include "CommonMatrix.h"
#include "device_functions.h"
#include <assert.h>
// REVIEW alexeyk: disable warnings properly for GCC/clang
#ifdef _MSC_VER
#pragma warning (push)
#pragma warning (disable: 4100)
#pragma warning (disable: 4127)
#pragma warning (disable: 4201)
#pragma warning (disable: 4515)
#endif
#include <cub/cub.cuh>
#ifdef _MSC_VER
#pragma warning (pop)
#endif
// We would like to use 64-bit integer to support large matrices. However, CUDA seems to support only 32-bit integer
// For now, use int32_t to ensure that both Linux and Windows see this as 32 bit integer type.
@ -4502,4 +4516,47 @@ __global__ void _AssignSequenceError(const ElemType hsmoothingWeight, ElemType *
//error[id] -= alpha * (label[id] - dnnoutput[id] );
}
template<class ElemType>
__global__ void _copyTopKResults(const uint64_t* indexes, const ElemType* values, ElemType* maxIndexes, ElemType* maxValues,
CUDA_LONG crow, CUDA_LONG ccol, int topK)
{
CUDA_LONG id = blockDim.x * blockIdx.x + threadIdx.x;
if (id >= topK * ccol)
return;
CUDA_LONG irow = id % topK;
CUDA_LONG icol = id / topK;
maxIndexes[id] = static_cast<CUDA_LONG>(indexes[icol * crow + irow] >> 32);
maxValues[id] = values[icol * crow + irow];
}
template<int BlockSize, class ElemType>
__global__ void _assignNumOfDiffCol(const ElemType *a, const ElemType *b, ElemType *c, CUDA_LONG crowB, CUDA_LONG ccol)
{
assert(gridDim.x == 1 && gridDim.y == 1 && gridDim.z == 1);
int cur = 0;
CUDA_LONG icol = threadIdx.x;
for (; icol < ccol; icol += blockDim.x)
{
ElemType key = a[icol];
CUDA_LONG idxB = icol * crowB;
CUDA_LONG irow = 0;
for (; irow < crowB; irow++, idxB++)
{
if (b[idxB] == key)
break;
}
cur += (irow == crowB);
}
using BlockReduceT = cub::BlockReduce<int, BlockSize>;
__shared__ typename BlockReduceT::TempStorage tmp;
int res = BlockReduceT(tmp).Sum(cur);
if (threadIdx.x == 0)
*c = res;
}
#endif // !CPUONLY

Просмотреть файл

@ -1737,6 +1737,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
#if 0 // no longer needed, and overkill anyway as it can be implemented as a bunch of calls to AssignRowSliceValuesOf()
//stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object.
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols)
@ -1797,7 +1798,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return *this;
}
#endif
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats)
@ -2456,7 +2457,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
Matrix<ElemType>& Matrix<ElemType>::AssignNumOfDiff (const Matrix<ElemType>& a, const Matrix<ElemType>& b)
Matrix<ElemType>& Matrix<ElemType>::AssignNumOfDiff (const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol)
{
DecideAndMoveToRightDevice(a, b, *this);
//WARNING: a and b must have same type
@ -2467,8 +2468,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
DISPATCH_MATRIX_ON_FLAG(this,
this,
this->m_CPUMatrix->AssignNumOfDiff(*a.m_CPUMatrix, *b.m_CPUMatrix),
this->m_GPUMatrix->AssignNumOfDiff(*a.m_GPUMatrix, *b.m_GPUMatrix),
this->m_CPUMatrix->AssignNumOfDiff(*a.m_CPUMatrix, *b.m_CPUMatrix, searchInCol),
this->m_GPUMatrix->AssignNumOfDiff(*a.m_GPUMatrix, *b.m_GPUMatrix, searchInCol),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
@ -3390,13 +3391,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
DISPATCH_MATRIX_ON_FLAG(this,
&maxValues,
this->m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix,*maxValues.m_CPUMatrix,isColWise); maxIndexes.SetDataLocation(CPU, DENSE),
this->m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix,*maxValues.m_GPUMatrix,isColWise); maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
this->m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise); maxIndexes.SetDataLocation(CPU, DENSE),
this->m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise); maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
}
}
template<class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
LogicError("VectorMax: Matrix is empty.");
DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(this,
&maxValues,
this->m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK); maxIndexes.SetDataLocation(CPU, DENSE),
this->m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK); maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
}
template<class ElemType>
void Matrix<ElemType>::VectorMin(Matrix<ElemType>& minIndexes, Matrix<ElemType>& minValues, const bool isColWise) const

Просмотреть файл

@ -75,7 +75,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
mutable size_t m_numTimesDeviceChanged;
mutable size_t m_numTimesMatrixTypeChanged;
mutable int m_devicesTransferedTo[2];
mutable int m_devicesTransferedTo[2]; // TODO: what is this for? Seems only diagnostics
//Moves matrix from device id_from to device with id_to. This method doesn't change preferred device Id
void _transferFromDeviceToDevice(int id_from, int id_to, bool ismoved=true,bool emptyTransfer=false) const;
@ -332,7 +332,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddToRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AddWithRowSliceValuesOf(const Matrix<ElemType>& a, const size_t startIndex, const size_t numRows);
Matrix<ElemType>& AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
//Matrix<ElemType>& AssignRowStackValuesOf(const std::vector<const Matrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols);
Matrix<ElemType>& AssignRepeatOf(const Matrix<ElemType>& a, const size_t numRowRepeats, const size_t numColRepeats);
Matrix<ElemType>& AddToRowRepeatValuesOf(const Matrix<ElemType>& a, const size_t numRepeats);
@ -368,9 +368,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignSignOf(const Matrix<ElemType>& a);
Matrix<ElemType>& AddSignOf(const Matrix<ElemType>& a);
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMin(Matrix<ElemType>& mainndexes, Matrix<ElemType>& minValues, const bool isColWise) const;
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const;
void VectorMin(Matrix<ElemType>& minIndexes, Matrix<ElemType>& minValues, const bool isColWise) const;
Matrix<ElemType>& AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b);
Matrix<ElemType>& AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol = false);
Matrix<ElemType>& AssignInnerProductOfMatrices(const Matrix<ElemType>& a, const Matrix<ElemType>& b); //this method will resize(1,1) first

Просмотреть файл

@ -505,7 +505,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//for each column of a, we add all rows of a to this starting from startIndex
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddWithRowSliceValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t startIndex, const size_t numRows) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; }
//template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRowStackValuesOf(const std::vector<const GPUMatrix<ElemType>*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRepeatOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AddToRowRepeatValuesOf(const GPUMatrix<ElemType>& /*a*/, const size_t numRowRepeats) { return *this; }
@ -863,10 +863,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType> void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise) const
{}
template<class ElemType> void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK, GPUMatrix<ElemType>& workspace) const
{}
template<class ElemType> void GPUMatrix<ElemType>::VectorMin(GPUMatrix<ElemType>& minIndexes, GPUMatrix<ElemType>& minValues, const bool isColWise) const
{}
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignNumOfDiff(const GPUMatrix<ElemType>& /*a*/, const GPUMatrix<ElemType>& /*b*/) { return *this; }
template<class ElemType> GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignNumOfDiff(const GPUMatrix<ElemType>& /*a*/, const GPUMatrix<ElemType>& /*b*/, bool /*searchInCol = false*/) { return *this; }
#pragma endregion Member BLAS Functions

Просмотреть файл

@ -1,41 +1,18 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
. $TEST_ROOT_DIR/run-test-common
ConfigDir=$TEST_DIR/../..
LogFileName=stderr
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
if cntkmpirun "-n 4" SimpleMultiGPU.config "precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]" ; then
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank2
sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank3
exit $ExitCode
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
exit $?
fi
configFile=$TEST_DIR/../../SimpleMultiGPU.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
# Since we use the MS MPI program on Windows, the CNTK binary path argument
# passed to mpiexec must be in the windows format
CNTKBinaryPath=$TEST_CNTK_BINARY
if [ "$OS" == "Windows_NT" ]; then
CNTKBinaryPath=$(cygpath -aw $CNTKBinaryPath)
fi
MPI_ARGS="-n 4"
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID stderr=$RunDir/stderr precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running "$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
"$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank2
sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank3
exit $ExitCode

Просмотреть файл

@ -1,41 +1,18 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
. $TEST_ROOT_DIR/run-test-common
ConfigDir=$TEST_DIR/../..
LogFileName=stderr
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
if cntkmpirun "-n 4" SimpleMultiGPU.config "precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]" ; then
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank2
sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank3
exit $ExitCode
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
exit $?
fi
configFile=$TEST_DIR/../../SimpleMultiGPU.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
# Since we use the MS MPI program on Windows, the CNTK binary path argument
# passed to mpiexec must be in the windows format
CNTKBinaryPath=$TEST_CNTK_BINARY
if [ "$OS" == "Windows_NT" ]; then
CNTKBinaryPath=$(cygpath -aw $CNTKBinaryPath)
fi
MPI_ARGS="-n 4"
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID stderr=$RunDir/stderr precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running "$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
"$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank2
sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/stderr_SimpleMultiGPU.logrank3
exit $ExitCode

Просмотреть файл

@ -1,27 +1,8 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
fi
configFile=$TEST_DIR/cntk_dpt.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
#!/bin/bash
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
. $TEST_ROOT_DIR/run-test-common
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running $TEST_CNTK_BINARY $CNTK_ARGS
$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk_dpt.config || exit $?

Просмотреть файл

@ -3,7 +3,11 @@ tags:
# running on every BVT job in 'S' (Speech) leg in Debug-GPU and Release-CPU configurations:
- bvt-s (flavor=='debug') ^ (device=='cpu')
# running unconditionally on every Nightly job in 'S' leg
- nightly-s
# TODO: Temporary disabling Release-GPU because of a known bug causing large variance between
# Release and Debug configurations for GPU only for this (Speech/DNN/DiscriminativePreTraining) test.
# This will be re-enabled after the bug has been addressed.
# DO NOT COPY this disablement for other tests!!
- nightly-s (flavor!='release') or (device!='gpu')
testCases:
CNTK Run must be completed:

Просмотреть файл

@ -1,40 +1,17 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
. $TEST_ROOT_DIR/run-test-common
ConfigDir=$TEST_DIR/..
LogFileName=stderr
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
if cntkmpirun "-n 3" cntk.config "precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]" ; then
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2
exit $ExitCode
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
exit $?
fi
configFile=$TEST_DIR/../cntk.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
# Since we use the MS MPI program on Windows, the CNTK binary path argument
# passed to mpiexec must be in the windows format
CNTKBinaryPath=$TEST_CNTK_BINARY
if [ "$OS" == "Windows_NT" ]; then
CNTKBinaryPath=$(cygpath -aw $CNTKBinaryPath)
fi
MPI_ARGS="-n 3"
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID stderr=$RunDir/stderr precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running "$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
"$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/stderr_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/stderr_speechTrain.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/stderr_speechTrain.logrank2
exit $ExitCode

Просмотреть файл

@ -1,40 +1,17 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
. $TEST_ROOT_DIR/run-test-common
ConfigDir=$TEST_DIR/..
LogFileName=stderr
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
if cntkmpirun "-n 3" cntk.config ; then
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2
exit $ExitCode
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
exit $?
fi
configFile=$TEST_DIR/../cntk.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
# Since we use the MS MPI program on Windows, the CNTK binary path argument
# passed to mpiexec must be in the windows format
CNTKBinaryPath=$TEST_CNTK_BINARY
if [ "$OS" == "Windows_NT" ]; then
CNTKBinaryPath=$(cygpath -aw $CNTKBinaryPath)
fi
MPI_ARGS="-n 3"
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID stderr=$RunDir/stderr"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running "$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
"$MPI_BINARY" $MPI_ARGS $CNTKBinaryPath $CNTK_ARGS
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/stderr_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/stderr_speechTrain.logrank1
sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/stderr_speechTrain.logrank2
exit $ExitCode

Просмотреть файл

@ -19,9 +19,9 @@ DNN=[
features=Input(featDim, tag=feature)
labels=Input(LabelDim, tag=label)
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
GlobalMean=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalMean$, computeGradient=false)
GlobalInvStd=Parameter(featDim, init=fromFile, initFromFilePath=$GlobalInvStd$, computeGradient=false)
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
GlobalPrior=Parameter(LabelDim, init=fromFile, initFromFilePath=$GlobalPrior$, computeGradient=false)
logPrior=Log(GlobalPrior)
# define network

Просмотреть файл

@ -1,7 +1,9 @@
DNNLayer(inDim, outDim, x)
{
W = Parameter(outDim, inDim);
b = Parameter(outDim);
#W = Parameter(outDim, inDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1); # randomizing on CPU with fixed seed to get reproducable results across configurations
#b = Parameter(outDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
W = Parameter(outDim, inDim);
b = Parameter(outDim);
t = Times(W, x);
z = Plus(t, b);
y = sigmoid(z);
@ -9,6 +11,8 @@ DNNLayer(inDim, outDim, x)
DNNLastLayer(hiddenDim, LabelDim, x)
{
#W = Parameter(LabelDim, hiddenDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
#b = Parameter(LabelDim, init=uniform, initValueScale=1, initOnCPUOnly=true, randomSeed=1);
W = Parameter(LabelDim, hiddenDim);
b = Parameter(LabelDim);
t = Times(W, x);

Просмотреть файл

@ -1,29 +1,8 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
fi
configFile=$TEST_DIR/../cntk.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
NDLDir=$TEST_DIR/..
. $TEST_ROOT_DIR/run-test-common
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
NDLDir=$(cygpath -aw $NDLDir)
fi
ConfigDir=$TEST_DIR/..
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID NDLDir=$NDLDir Truncated=false speechTrain=[reader=[nbruttsineachrecurrentiter=1]] speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]]"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running $TEST_CNTK_BINARY $CNTK_ARGS
$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk.config 'Truncated=false speechTrain=[reader=[nbruttsineachrecurrentiter=1]] speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]]' || exit $?

Просмотреть файл

@ -1,29 +1,8 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
fi
configFile=$TEST_DIR/../cntk.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
NDLDir=$TEST_DIR/..
. $TEST_ROOT_DIR/run-test-common
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
NDLDir=$(cygpath -aw $NDLDir)
fi
ConfigDir=$TEST_DIR/..
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID NDLDir=$NDLDir"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running $TEST_CNTK_BINARY $CNTK_ARGS
$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk.config || exit $?

Просмотреть файл

@ -14,7 +14,7 @@ speechTrain=[
traceLevel=1
#NDLNetworkBuilder=[
# networkDescription=$NDLDir$/lstmp-3layer_WithSelfStab.ndl
# networkDescription=$ConfigDir$/lstmp-3layer_WithSelfStab.ndl
#]
SGD=[

Просмотреть файл

@ -1,31 +1,12 @@
#!/bin/bash
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTK_DEVICE_ID=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTK_DEVICE_ID=0
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
fi
configFile=$TEST_DIR/cntk.config
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
. $TEST_ROOT_DIR/run-test-common
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
configFile=$(cygpath -aw $configFile)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID"
MODELS_DIR=$TEST_RUN_DIR/models
[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
mkdir -p $MODELS_DIR || exit $?
echo === Running $TEST_CNTK_BINARY $CNTK_ARGS
$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk.config || exit $?
echo === Deleting last epoch data
rm $TEST_RUN_DIR/models/*.dnn
echo ==== Re-running from checkpoint
$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
DeleteExistingModels=0
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun cntk.config || exit $?

Просмотреть файл

@ -1,38 +1,32 @@
How to run the Tests\Speech test
================================
Full test
---------
Install Cygwin with the python module.
Execute 'Tests/Testdriver.py run' script. This will run the test in various Tests (recursively). Note that the first time you may get an error about the missing YAML python module that you will need to install.
Command lines for debugging
---------------------------
Note: Below, the current dir is set to be the data directory. This allows for local paths in data scripts.
--- QuickE2E:
--- Speech\QuickE2E:
COMMAND: currentDirectory=$(SolutionDir)Tests\Speech\Data configFile=$(SolutionDir)Tests\Speech\QuickE2E\cntk.config stderr=$(SolutionDir)Tests\Speech\RunDir\QuickE2E\models\cntkSpeech.dnn.log RunDir=$(SolutionDir)Tests\Speech\RunDir\QuickE2E DataDir=. DeviceId=Auto
Linux:
bin/cntk configFile=Tests/Speech/QuickE2E/cntk.config RunDir=Tests/Speech/RunDirL/QuickE2E DataDir=Tests/Speech/Data DeviceId=0
# TODO: can stderr refer to RunDir?
--- LSTM\Truncated:
--- Speech\LSTM\Truncated:
COMMAND: currentDirectory=$(SolutionDir)Tests\Speech\Data configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\Truncated\models\cntkSpeech.dnn.log RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM\Truncated NdlDir=$(SolutionDir)Tests\Speech\LSTM DataDir=. DeviceId=Auto
--- LSTM\FullUtterance:
--- Speech\LSTM\FullUtterance:
COMMAND: currentDirectory=$(SolutionDir)Tests\Speech\Data configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\FullUtterance\models\cntkSpeech.dnn.log RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM\FullUtterance NdlDir=$(SolutionDir)Tests\Speech\LSTM DataDir=. DeviceId=Auto Truncated=false speechTrain=[reader=[nbruttsineachrecurrentiter=1]] speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]]
--- Speech\DiscriminativePreTraining:
COMMAND: currentDirectory=$(SolutionDir)Tests\Speech\Data configFile=..\DNN\DiscriminativePreTraining\cntk_dpt.config stderr=..\RunDir\DNN\DiscriminativePreTraining\models\cntkSpeech.dnn.log RunDir=..\RunDir\DNN\DiscriminativePreTraining DataDir=. DeviceId=Auto
--- MNIST:
WORKING DIR:
COMMAND: currentDirectory=$(SolutionDir)ExampleSetups\Image\MNIST configFile=02_Conv.config configName=02_Conv

Просмотреть файл

@ -262,6 +262,10 @@ class Test:
if args.verbose:
print self.fullName + ": " + line
if args.dry_run:
print line
continue
print >>output, line
allLines.append(line)
output.flush()
@ -271,13 +275,17 @@ class Test:
exitCode = process.wait()
success = True
# checking exit code
if exitCode != 0:
return TestRunResult.fatalError("Exit code must be 0", "==> got exit code {0} when running: {1}".format(exitCode, " ".join(cmdLine)), logFile = logFile)
# saving log file path, so it can be reported later
result.logFile = logFile
# checking exit code
if exitCode != 0:
if args.dry_run:
print "[SKIPPED]"
return result
else:
return TestRunResult.fatalError("Exit code must be 0", "==> got exit code {0} when running: {1}".format(exitCode, " ".join(cmdLine)), logFile = logFile)
# finalizing verification - need to check whether we have any unmatched lines
for testCaseRunResult in result.testCaseRunResults:
testCaseRunResult.testCase.finalize(testCaseRunResult)
@ -546,6 +554,8 @@ def runCommand(args):
devices = args.devices
flavors = args.flavors
os.environ["TEST_ROOT_DIR"] = os.path.dirname(os.path.realpath(sys.argv[0]))
print "CNTK Test Driver is started"
print "Running tests: ", " ".join([y.fullName for y in testsToRun])
print "Build location: ", args.build_location
@ -555,6 +565,8 @@ def runCommand(args):
if (args.update_baseline):
print "*** Running in automatic baseline update mode ***"
print ""
if args.dry_run:
os.environ["DRY_RUN"] = "1"
succeededCount, totalCount = 0, 0
for test in testsToRun:
for flavor in flavors:
@ -569,7 +581,6 @@ def runCommand(args):
sys.stdout.write("Running test {0} ({1} {2}) - ".format(test.fullName, flavor, device));
if args.dry_run:
print "[SKIPPED] (dry-run)"
continue
# in verbose mode, terminate the line, since there will be a lot of output
if args.verbose:
sys.stdout.write("\n");
@ -629,7 +640,7 @@ defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and condfigurations to be run")
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines")
runSubparser.set_defaults(func=runCommand)

106
Tests/run-test-common Normal file
Просмотреть файл

@ -0,0 +1,106 @@
#!/bin/bash
# Helper script containing common code used by run-test scripts of E2E tests
BinaryPath=$TEST_CNTK_BINARY
if [ "$TEST_DEVICE" == "cpu" ]; then
CNTKDeviceId=-1
elif [ "$TEST_DEVICE" == "gpu" ]; then
CNTKDeviceId=0
else
echo "Error: Unknown TEST_DEVICE specified!"
exit 3
fi
LogFileName=
ConfigDir=$TEST_DIR
RunDir=$TEST_RUN_DIR
DataDir=$TEST_DATA_DIR
MPIMode=0
MPIArgs=
DeleteExistingModels=1
# Helper function to print and run a command
run()
{
cmd=$1
shift
if [ "$DRY_RUN" == "1" ]; then
workingDir=$PWD
if [ "$OS" == "Windows_NT" ]; then
workingDir=$(cygpath -aw $workingDir)
if [[ $MPIMode == 0 ]]; then
cmd=$(cygpath -aw $cmd)
TEST_ROOT_DIR_ESCAPED=`echo -n $(cygpath -aw $TEST_ROOT_DIR) | sed 's/\\\\/\\\\\\\\/g'`
workingDir=`echo "$workingDir" | sed "s/$TEST_ROOT_DIR_ESCAPED/\\$\\(SolutionDir\\)\\\\\\\\Tests/g"`
fi
fi
echo Working Directory: $workingDir
echo Full command: "$cmd" "$@"
if [ "$OS" == "Windows_NT" ]; then
if [[ $MPIMode == 0 ]]; then
echo VS debugging command args: "$@" | sed "s/$TEST_ROOT_DIR_ESCAPED/\\$\\(SolutionDir\\)\\\\Tests/g"
fi
fi
return 1
else
echo === Running "$cmd" "$@"
"$cmd" "$@"
return $?
fi
}
# Function for launching the CNTK executable
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun()
{
configFileName=$1
additionalCNTKArgs=$2
if [ "$OS" == "Windows_NT" ]; then
# When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
ConfigDir=$(cygpath -aw $ConfigDir)
RunDir=$(cygpath -aw $RunDir)
DataDir=$(cygpath -aw $DataDir)
fi
CNTKArgs="configFile=$ConfigDir/$configFileName RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTKDeviceId $additionalCNTKArgs"
if [ "$LogFileName" != "" ]; then
CNTKArgs="$CNTKArgs stderr=$RunDir/$LogFileName"
fi
modelsDir=$TEST_RUN_DIR/models
if [[ $DeleteExistingModels == 1 ]]; then
[ -d $modelsDir ] && rm -rf $modelsDir
fi
mkdir -p $modelsDir || exit $?
if [[ $MPIMode == 0 ]]; then
run "$BinaryPath" $CNTKArgs
else
run "$MPI_BINARY" $MPIArgs $BinaryPath $CNTKArgs
fi
return $?
}
# Function for launching a parallel CNTK run with MPI
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun()
{
# Since we use the MS MPI program on Windows, the CNTK binary path argument
# passed to mpiexec must be in the windows format
if [ "$OS" == "Windows_NT" ]; then
BinaryPath=$(cygpath -aw $BinaryPath)
fi
MPIMode=1
MPIArgs=$1
cntkrun "$2" "$3"
return $?
}

81
configure поставляемый
Просмотреть файл

@ -28,6 +28,13 @@ have_gdk=no
gdk_path=
gdk_check=include/nvidia/gdk/nvml.h
have_cub=no
cub_path=
cub_check=cub/cub.cuh
have_opencv=no
opencv_path=
opencv_check=include/opencv2/opencv.hpp
mathlib=
# List from best to worst choice
@ -41,6 +48,8 @@ default_mkls=""
default_cudas="cuda-7.5 cuda-7.0 cuda-6.5"
default_kaldis="kaldi-trunk"
default_gdks=". gdk/usr"
default_cubs="cub-1.4.1"
default_opencvs="opencv-3.0.0"
function default_paths ()
{
@ -101,6 +110,15 @@ function find_gdk ()
find_dir "$default_gdks" "$gdk_check"
}
function find_cub ()
{
find_dir "$default_cubs" "$cub_check"
}
function find_opencv ()
{
find_dir "$default_opencvs" "$opencv_check"
}
function is_hardlinked ()
{
r=no
@ -142,11 +160,13 @@ function show_help ()
echo " --add directory add directory to library search path"
echo " --cuda[=(yes|no)] use cuda GPU $(show_default $(default_use_cuda))"
echo " --with-cuda[=directory] $(show_default $(find_cuda))"
echo " --with-cub[=directory] $(show_default $(find_cub))"
echo " --with-gdk[=directory] $(show_default $(find_gdk))"
echo " --with-acml[=directory] $(show_default $(find_acml))"
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
echo " --with-kaldi[=directory] $(show_default $(find_kaldi))"
echo " --with-opencv[=directory] $(show_default $(find_opencv))"
echo "Libraries search path:"
for head in $(default_paths)
do
@ -215,6 +235,28 @@ do
fi
fi
;;
--with-cub*)
have_cub=yes
if test x$optarg = x
then
cub_path=$(find_cub)
if test x$cub_path = x
then
echo "Cannot find NVIDIA CUB directory."
echo "Please specify a value for --with-cub"
echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local"
exit 1
fi
else
if test $(check_dir $optarg $cub_check) = yes
then
cub_path=$optarg
else
echo "Invalid CUB directory $optarg"
exit 1
fi
fi
;;
--with-gdk*)
have_gdk=yes
if test x$optarg = x
@ -313,6 +355,28 @@ do
fi
fi
;;
--with-opencv*)
have_opencv=yes
if test x$optarg = x
then
opencv_path=$(find_opencv)
if test x$opencv_path = x
then
echo "Cannot find OpenCV directory."
echo "Please specify a value for --with-opencv"
echo "OpenCV can be downloaded from http://opencv.org/downloads.html, install instructions http://docs.opencv.org/doc/tutorials/introduction/linux_install/linux_install.html#linux-installation"
exit 1
fi
else
if test $(check_dir $optarg $opencv_check) = yes
then
opencv_path=$optarg
else
echo "Invalid OpenCV directory $optarg"
exit 1
fi
fi
;;
*)
echo Invalid option $key
show_help
@ -372,6 +436,18 @@ then
fi
fi
if test $enable_cuda = yes && test x$cub_path = x
then
cub_path=$(find_cub)
if test x$cub_path = x ; then
echo Cannot locate NVIDIA CUB directory
echo GPU will be disabled
echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local
enable_cuda=no
else
echo Found CUB at $cub_path
fi
fi
config=$build_top/Config.make
echo Generating $config
echo "#Configuration file for cntk" > $config
@ -388,10 +464,15 @@ esac
if test $enable_cuda = yes ; then
echo CUDA_PATH=$cuda_path >> $config
echo GDK_PATH=$gdk_path >> $config
echo CUB_PATH=$cub_path >> $config
fi
if test x$kaldi_path != x ; then
echo KALDI_PATH=$kaldi_path >> $config
fi
if test x$opencv_path != x ; then
echo Found OpenCV at $opencv_path
echo OPENCV_PATH=$opencv_path >> $config
fi
# If we are not in the configure directory, generate a trampoline Makefile
makefile=$build_top/Makefile